• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

hazendaz / sitemesh2 / 59

22 Mar 2026 02:30AM UTC coverage: 40.347%. Remained the same
59

push

github

hazendaz
[mvn] Update maven wrapper

698 of 1891 branches covered (36.91%)

Branch coverage included in aggregate %.

1555 of 3693 relevant lines covered (42.11%)

0.42 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

45.79
/src/main/java/com/opensymphony/module/sitemesh/parser/FastPageParser.java
1
/*
2
 * SPDX-License-Identifier: Apache-2.0
3
 * Copyright 2011-2026 Hazendaz
4
 */
5
/*
6
 * Title:        FastPageParser
7
 * Description:
8
 *
9
 * This software is published under the terms of the OpenSymphony Software
10
 * License version 1.1, of which a copy has been included with this
11
 * distribution in the LICENSE.txt file.
12
 */
13

14
package com.opensymphony.module.sitemesh.parser;
15

16
import com.opensymphony.module.sitemesh.DefaultSitemeshBuffer;
17
import com.opensymphony.module.sitemesh.Page;
18
import com.opensymphony.module.sitemesh.PageParser;
19
import com.opensymphony.module.sitemesh.SitemeshBuffer;
20
import com.opensymphony.module.sitemesh.html.util.CharArray;
21
import com.opensymphony.module.sitemesh.util.CharArrayReader;
22

23
import java.io.IOException;
24
import java.util.Collections;
25
import java.util.HashMap;
26
import java.util.Map;
27

28
/**
29
 * Very fast PageParser implementation for parsing HTML.
30
 * <p>
31
 * Produces FastPage.
32
 *
33
 * @author <a href="mailto:salaman@qoretech.com">Victor Salaman</a>
34
 *
35
 * @deprecated Use HTMLPageParser instead - it performs better and is more extensible.
36
 */
37
@Deprecated
38
public final class FastPageParser implements PageParser {
1✔
39

40
    /** The Constant TOKEN_NONE. */
41
    private static final int TOKEN_NONE = -0;
42

43
    /** The Constant TOKEN_EOF. */
44
    private static final int TOKEN_EOF = -1;
45

46
    /** The Constant TOKEN_TEXT. */
47
    private static final int TOKEN_TEXT = -2;
48

49
    /** The Constant TOKEN_TAG. */
50
    private static final int TOKEN_TAG = -3;
51

52
    /** The Constant TOKEN_COMMENT. */
53
    private static final int TOKEN_COMMENT = -4;
54

55
    /** The Constant TOKEN_CDATA. */
56
    private static final int TOKEN_CDATA = -5;
57

58
    /** The Constant TOKEN_SCRIPT. */
59
    private static final int TOKEN_SCRIPT = -6;
60

61
    /** The Constant TOKEN_DOCTYPE. */
62
    private static final int TOKEN_DOCTYPE = -7;
63

64
    /** The Constant TOKEN_EMPTYTAG. */
65
    private static final int TOKEN_EMPTYTAG = -8;
66

67
    /** The Constant STATE_EOF. */
68
    private static final int STATE_EOF = -1;
69

70
    /** The Constant STATE_TEXT. */
71
    private static final int STATE_TEXT = -2;
72

73
    /** The Constant STATE_TAG. */
74
    private static final int STATE_TAG = -3;
75

76
    /** The Constant STATE_COMMENT. */
77
    private static final int STATE_COMMENT = -4;
78

79
    /** The Constant STATE_TAG_QUOTE. */
80
    private static final int STATE_TAG_QUOTE = -5;
81

82
    /** The Constant STATE_CDATA. */
83
    private static final int STATE_CDATA = -6;
84

85
    /** The Constant STATE_SCRIPT. */
86
    private static final int STATE_SCRIPT = -7;
87

88
    /** The Constant STATE_DOCTYPE. */
89
    private static final int STATE_DOCTYPE = -8;
90

91
    /** The Constant TAG_STATE_NONE. */
92
    private static final int TAG_STATE_NONE = 0;
93

94
    /** The Constant TAG_STATE_HTML. */
95
    private static final int TAG_STATE_HTML = -1;
96

97
    /** The Constant TAG_STATE_HEAD. */
98
    private static final int TAG_STATE_HEAD = -2;
99

100
    /** The Constant TAG_STATE_TITLE. */
101
    private static final int TAG_STATE_TITLE = -3;
102

103
    /** The Constant TAG_STATE_BODY. */
104
    private static final int TAG_STATE_BODY = -4;
105

106
    /** The Constant TAG_STATE_XML. */
107
    private static final int TAG_STATE_XML = -6;
108

109
    /** The Constant TAG_STATE_XMP. */
110
    private static final int TAG_STATE_XMP = -7;
111

112
    // These hashcodes are hardcoded because swtich statements can only
113
    // switch on compile-time constants.
114
    // In theory it is possible for there to be a hashcode collision with
115
    // other HTML tags, however in practice it is *very* unlikely because
116
    // tags are generally only a few characters long and hence are likely
117
    // to produce unique values.
118

119
    /** The Constant SLASH_XML_HASH. */
120
    private static final int SLASH_XML_HASH = 1518984; // "/xml".hashCode();
121

122
    /** The Constant XML_HASH. */
123
    private static final int XML_HASH = 118807; // "xml".hashCode();
124

125
    /** The Constant SLASH_XMP_HASH. */
126
    private static final int SLASH_XMP_HASH = 1518988; // "/xmp".hashCode();
127

128
    /** The Constant XMP_HASH. */
129
    private static final int XMP_HASH = 118811; // "xmp".hashCode();
130

131
    /** The Constant HTML_HASH. */
132
    private static final int HTML_HASH = 3213227; // "html".hashCode();
133

134
    /** The Constant SLASH_HTML_HASH. */
135
    private static final int SLASH_HTML_HASH = 46618714; // "/html".hashCode();
136

137
    /** The Constant HEAD_HASH. */
138
    private static final int HEAD_HASH = 3198432; // "head".hashCode();
139

140
    /** The Constant TITLE_HASH. */
141
    private static final int TITLE_HASH = 110371416; // "title".hashCode();
142

143
    /** The Constant SLASH_TITLE_HASH. */
144
    private static final int SLASH_TITLE_HASH = 1455941513; // "/title".hashCode();
145

146
    /** The Constant PARAMETER_HASH. */
147
    private static final int PARAMETER_HASH = 1954460585; // "parameter".hashCode();
148

149
    /** The Constant META_HASH. */
150
    private static final int META_HASH = 3347973; // "meta".hashCode();
151

152
    /** The Constant SLASH_HEAD_HASH. */
153
    private static final int SLASH_HEAD_HASH = 46603919; // "/head".hashCode();
154

155
    /** The Constant FRAMESET_HASH. */
156
    private static final int FRAMESET_HASH = -1644953643; // "frameset".hashCode();
157

158
    /** The Constant FRAME_HASH. */
159
    private static final int FRAME_HASH = 97692013; // "frame".hashCode();
160

161
    /** The Constant BODY_HASH. */
162
    private static final int BODY_HASH = 3029410; // "body".hashCode();
163

164
    /** The Constant SLASH_BODY_HASH. */
165
    private static final int SLASH_BODY_HASH = 46434897; // "/body".hashCode();
166

167
    /** The Constant CONTENT_HASH. */
168
    private static final int CONTENT_HASH = 951530617; // "content".hashCode();
169

170
    @Override
171
    public Page parse(char[] buffer) throws IOException {
172
        return parse(new DefaultSitemeshBuffer(buffer));
×
173
    }
174

175
    @Override
176
    public Page parse(SitemeshBuffer buffer) throws IOException {
177
        CharArrayReader reader = new CharArrayReader(buffer.getCharArray(), 0, buffer.getBufferLength());
1✔
178
        CharArray _buffer = new CharArray(4096);
1✔
179
        CharArray _body = new CharArray(4096);
1✔
180
        CharArray _head = new CharArray(512);
1✔
181
        CharArray _title = new CharArray(128);
1✔
182
        Map<String, String> _htmlProperties = null;
1✔
183
        Map<String, String> _metaProperties = new HashMap<>(6);
1✔
184
        Map<String, String> _sitemeshProperties = new HashMap<>(6);
1✔
185
        Map<String, String> _bodyProperties = null;
1✔
186

187
        CharArray _currentTaggedContent = new CharArray(1024);
1✔
188
        String _contentTagId = null;
1✔
189
        boolean tagged = false;
1✔
190

191
        boolean _frameSet = false;
1✔
192

193
        int _state = STATE_TEXT;
1✔
194
        int _tokenType = TOKEN_NONE;
1✔
195
        int _pushBack = 0;
1✔
196
        int _comment = 0;
1✔
197
        int _quote = 0;
1✔
198
        boolean hide = false;
1✔
199

200
        int state = TAG_STATE_NONE;
1✔
201
        int laststate = TAG_STATE_NONE;
1✔
202
        boolean doneTitle = false;
1✔
203

204
        // This tag object gets reused each iteration.
205
        Tag tagObject = new Tag();
1✔
206

207
        while (_tokenType != TOKEN_EOF) {
1✔
208
            if (tagged) {
1✔
209
                if (_tokenType == TOKEN_TAG || _tokenType == TOKEN_EMPTYTAG) {
1!
210
                    if (_buffer == null || _buffer.length() == 0) {
1!
211
                        _tokenType = TOKEN_NONE;
×
212
                        continue;
×
213
                    }
214

215
                    if (parseTag(tagObject, _buffer) == null) {
1!
216
                        continue;
×
217
                    }
218

219
                    // Note that the '/' survives the | 32 operation
220
                    if (_buffer.compareLowerSubstr("/content")) {
1✔
221
                        tagged = false;
1✔
222
                        if (_contentTagId != null) {
1!
223
                            state = TAG_STATE_NONE;
1✔
224
                            _sitemeshProperties.put(_contentTagId, _currentTaggedContent.toString());
1✔
225
                            _currentTaggedContent.setLength(0);
1✔
226
                            _contentTagId = null;
1✔
227
                        }
228
                    } else {
229
                        _currentTaggedContent.append('<').append(_buffer).append('>');
1✔
230
                    }
231
                } else if (_tokenType == TOKEN_COMMENT) {
1!
232
                    if (_buffer.length() > 0) {
×
233
                        _currentTaggedContent.append("<!--");
×
234
                        _currentTaggedContent.append(_buffer);
×
235
                        _currentTaggedContent.append("-->");
×
236
                    }
237
                } else if (_tokenType == TOKEN_CDATA) {
1!
238
                    if (_buffer.length() > 0) {
×
239
                        _currentTaggedContent.append("<![CDATA[");
×
240
                        _currentTaggedContent.append(_buffer);
×
241
                        _currentTaggedContent.append("]]>");
×
242
                    }
243
                } else if (_tokenType == TOKEN_SCRIPT) {
1!
244
                    if (_buffer.length() > 0) {
×
245
                        _currentTaggedContent.append('<');
×
246
                        _currentTaggedContent.append(_buffer);
×
247
                    }
248
                } else {
249
                    if (_buffer.length() > 0) {
1!
250
                        _currentTaggedContent.append(_buffer);
1✔
251
                    }
252
                }
253
            } else if (_tokenType == TOKEN_TAG || _tokenType == TOKEN_EMPTYTAG) {
1✔
254
                if (_buffer == null || _buffer.length() == 0) {
1!
255
                    _tokenType = TOKEN_NONE;
×
256
                    continue;
×
257
                }
258

259
                if (parseTag(tagObject, _buffer) == null) {
1!
260
                    _tokenType = TOKEN_TEXT;
×
261
                    continue;
×
262
                }
263

264
                int tagHash = _buffer.substrHashCode();
1✔
265

266
                if (state == TAG_STATE_XML || state == TAG_STATE_XMP) {
1!
267
                    writeTag(state, laststate, hide, _head, _buffer, _body);
×
268
                    if (state == TAG_STATE_XML && tagHash == SLASH_XML_HASH
×
269
                            || state == TAG_STATE_XMP && tagHash == SLASH_XMP_HASH) {
270
                        state = laststate;
×
271
                    }
272
                } else {
273
                    boolean doDefault = false;
1✔
274
                    switch (tagHash) {
1!
275
                        case HTML_HASH:
276
                            if (!_buffer.compareLowerSubstr("html")) { // skip any accidental hash collisions
1!
277
                                doDefault = true;
×
278
                                break;
×
279
                            }
280
                            state = TAG_STATE_HTML;
1✔
281
                            _htmlProperties = parseProperties(tagObject, _buffer).properties;
1✔
282
                            break;
1✔
283
                        case HEAD_HASH:
284
                            if (!_buffer.compareLowerSubstr("head")) { // skip any accidental hash collisions
1!
285
                                doDefault = true;
×
286
                                break;
×
287
                            }
288
                            state = TAG_STATE_HEAD;
1✔
289
                            break;
1✔
290
                        case XML_HASH:
291
                            if (!_buffer.compareLowerSubstr("xml")) { // skip any accidental hash collisions
×
292
                                doDefault = true;
×
293
                                break;
×
294
                            }
295
                            laststate = state;
×
296
                            writeTag(state, laststate, hide, _head, _buffer, _body);
×
297
                            state = TAG_STATE_XML;
×
298
                            break;
×
299
                        case XMP_HASH:
300
                            if (!_buffer.compareLowerSubstr("xmp")) { // skip any accidental hash collisions
×
301
                                doDefault = true;
×
302
                                break;
×
303
                            }
304
                            laststate = state;
×
305
                            writeTag(state, laststate, hide, _head, _buffer, _body);
×
306
                            state = TAG_STATE_XMP;
×
307
                            break;
×
308
                        case TITLE_HASH:
309
                            if (!_buffer.compareLowerSubstr("title")) { // skip any accidental hash collisions
1!
310
                                doDefault = true;
×
311
                                break;
×
312
                            }
313
                            if (doneTitle) {
1!
314
                                hide = true;
×
315
                            } else {
316
                                laststate = state;
1✔
317
                                state = TAG_STATE_TITLE;
1✔
318
                            }
319
                            break;
1✔
320
                        case SLASH_TITLE_HASH:
321
                            if (!_buffer.compareLowerSubstr("/title")) { // skip any accidental hash collisions
1!
322
                                doDefault = true;
×
323
                                break;
×
324
                            }
325
                            if (doneTitle) {
1!
326
                                hide = false;
×
327
                            } else {
328
                                doneTitle = true;
1✔
329
                                state = laststate;
1✔
330
                            }
331
                            break;
1✔
332
                        case PARAMETER_HASH:
333
                            if (!_buffer.compareLowerSubstr("parameter")) { // skip any accidental hash collisions
×
334
                                doDefault = true;
×
335
                                break;
×
336
                            }
337
                            parseProperties(tagObject, _buffer);
×
338
                            String name = (String) tagObject.properties.get("name");
×
339
                            String value = (String) tagObject.properties.get("value");
×
340

341
                            if (name != null && value != null) {
×
342
                                _sitemeshProperties.put(name, value);
×
343
                            }
344
                            break;
345
                        case META_HASH:
346
                            if (!_buffer.compareLowerSubstr("meta")) { // skip any accidental hash collisions
1!
347
                                doDefault = true;
×
348
                                break;
×
349
                            }
350
                            CharArray metaDestination = state == TAG_STATE_HEAD ? _head : _body;
1!
351
                            metaDestination.append('<');
1✔
352
                            metaDestination.append(_buffer);
1✔
353
                            metaDestination.append('>');
1✔
354
                            parseProperties(tagObject, _buffer);
1✔
355
                            name = (String) tagObject.properties.get("name");
1✔
356
                            value = (String) tagObject.properties.get("content");
1✔
357

358
                            if (name == null) {
1!
359
                                String httpEquiv = (String) tagObject.properties.get("http-equiv");
×
360

361
                                if (httpEquiv != null) {
×
362
                                    name = "http-equiv." + httpEquiv;
×
363
                                }
364
                            }
365

366
                            if (name != null && value != null) {
1!
367
                                _metaProperties.put(name, value);
1✔
368
                            }
369
                            break;
370
                        case SLASH_HEAD_HASH:
371
                            if (!_buffer.compareLowerSubstr("/head")) { // skip any accidental hash collisions
1!
372
                                doDefault = true;
×
373
                                break;
×
374
                            }
375
                            state = TAG_STATE_HTML;
1✔
376
                            break;
1✔
377
                        case FRAME_HASH:
378
                            if (!_buffer.compareLowerSubstr("frame")) { // skip any accidental hash collisions
×
379
                                doDefault = true;
×
380
                                break;
×
381
                            }
382
                            _frameSet = true;
×
383
                            break;
×
384
                        case FRAMESET_HASH:
385
                            if (!_buffer.compareLowerSubstr("frameset")) { // skip any accidental hash collisions
×
386
                                doDefault = true;
×
387
                                break;
×
388
                            }
389
                            _frameSet = true;
×
390
                            break;
×
391
                        case BODY_HASH:
392
                            if (!_buffer.compareLowerSubstr("body")) { // skip any accidental hash collisions
1!
393
                                doDefault = true;
×
394
                                break;
×
395
                            }
396
                            if (_tokenType == TOKEN_EMPTYTAG) {
1!
397
                                state = TAG_STATE_BODY;
×
398
                            }
399
                            _bodyProperties = parseProperties(tagObject, _buffer).properties;
1✔
400
                            break;
1✔
401
                        case CONTENT_HASH:
402
                            if (!_buffer.compareLowerSubstr("content")) { // skip any accidental hash collisions
1!
403
                                doDefault = true;
×
404
                                break;
×
405
                            }
406
                            state = TAG_STATE_NONE;
1✔
407
                            Map<String, String> props = parseProperties(tagObject, _buffer).properties;
1✔
408
                            if (props != null) {
1!
409
                                tagged = true;
1✔
410
                                _contentTagId = (String) props.get("tag");
1✔
411
                            }
412
                            break;
413
                        case SLASH_XMP_HASH:
414
                            if (!_buffer.compareLowerSubstr("/xmp")) { // skip any accidental hash collisions
×
415
                                doDefault = true;
×
416
                                break;
×
417
                            }
418
                            hide = false;
×
419
                            break;
×
420
                        case SLASH_BODY_HASH:
421
                            if (!_buffer.compareLowerSubstr("/body")) { // skip any accidental hash collisions
1!
422
                                doDefault = true;
×
423
                                break;
×
424
                            }
425
                            state = TAG_STATE_NONE;
1✔
426
                            hide = true;
1✔
427
                            break;
1✔
428
                        case SLASH_HTML_HASH:
429
                            if (!_buffer.compareLowerSubstr("/html")) { // skip any accidental hash collisions
1!
430
                                doDefault = true;
×
431
                                break;
×
432
                            }
433
                            state = TAG_STATE_NONE;
1✔
434
                            hide = true;
1✔
435
                            break;
1✔
436
                        default:
437
                            doDefault = true;
1✔
438
                    }
439
                    if (doDefault) {
1✔
440
                        writeTag(state, laststate, hide, _head, _buffer, _body);
1✔
441
                    }
442
                }
443
            } else if (!hide) {
1✔
444
                switch (_tokenType) {
1!
445
                    case TOKEN_TEXT:
446
                        if (state == TAG_STATE_TITLE) {
1✔
447
                            _title.append(_buffer);
1✔
448
                        } else if (shouldWriteToHead(state, laststate)) {
1✔
449
                            _head.append(_buffer);
1✔
450
                        } else {
451
                            _body.append(_buffer);
1✔
452
                        }
453
                        break;
1✔
454
                    case TOKEN_COMMENT: {
455
                        final CharArray commentDestination = shouldWriteToHead(state, laststate) ? _head : _body;
×
456
                        commentDestination.append("<!--");
×
457
                        commentDestination.append(_buffer);
×
458
                        commentDestination.append("-->");
×
459
                        break;
×
460
                    }
461
                    case TOKEN_CDATA: {
462
                        final CharArray commentDestination = state == TAG_STATE_HEAD ? _head : _body;
×
463
                        commentDestination.append("<![CDATA[");
×
464
                        commentDestination.append(_buffer);
×
465
                        commentDestination.append("]]>");
×
466
                        break;
×
467
                    }
468
                    case TOKEN_SCRIPT: {
469
                        final CharArray commentDestination = state == TAG_STATE_HEAD ? _head : _body;
×
470
                        commentDestination.append('<');
×
471
                        commentDestination.append(_buffer);
×
472
                        break;
×
473
                    }
474
                    default:
475
                        break;
476
                }
477
            }
478
            _buffer.setLength(0);
1✔
479

480
            start: while (true) {
481
                int c;
482

483
                if (_pushBack != 0) {
1!
484
                    c = _pushBack;
×
485
                    _pushBack = 0;
×
486
                } else {
487
                    try {
488
                        c = reader.read();
1✔
489
                    } catch (IOException e) {
×
490
                        _tokenType = TOKEN_EOF;
×
491
                        break start;
×
492
                    }
1✔
493
                }
494

495
                if (c < 0) {
1✔
496
                    int tmpstate = _state;
1✔
497
                    _state = STATE_EOF;
1✔
498

499
                    if (_buffer.length() > 0 && tmpstate == STATE_TEXT) {
1!
500
                        _tokenType = TOKEN_TEXT;
1✔
501
                        break start;
1✔
502
                    }
503
                    _tokenType = TOKEN_EOF;
1✔
504
                    break start;
1✔
505
                }
506

507
                switch (_state) {
1!
508
                    case STATE_TAG: {
509
                        int buflen = _buffer.length();
1✔
510

511
                        if (c == '>') {
1✔
512
                            if (_buffer.length() > 1 && _buffer.charAt(_buffer.length() - 1) == '/') {
1✔
513
                                _tokenType = TOKEN_EMPTYTAG;
1✔
514
                            } else {
515
                                _tokenType = TOKEN_TAG;
1✔
516
                            }
517
                            _state = STATE_TEXT;
1✔
518
                            break start;
1✔
519
                        }
520
                        if (c == '/') {
1✔
521
                            _buffer.append('/');
1✔
522
                        } else if (c == '<' && buflen == 0) {
1!
523
                            _buffer.append("<<");
×
524
                            _state = STATE_TEXT;
×
525
                        } else if (c == '-' && buflen == 2 && _buffer.charAt(1) == '-' && _buffer.charAt(0) == '!') {
1!
526
                            _buffer.setLength(0);
×
527
                            _state = STATE_COMMENT;
×
528
                        } else if (c == '[' && buflen == 7 && _buffer.charAt(0) == '!' && _buffer.charAt(1) == '['
1!
529
                                && _buffer.compareLower("cdata", 2)) {
×
530
                            _buffer.setLength(0);
×
531
                            _state = STATE_CDATA;
×
532
                        } else if ((c == 'e' || c == 'E') && buflen == 7 && _buffer.charAt(0) == '!'
1!
533
                                && _buffer.compareLower("doctyp", 1)) {
×
534
                            _buffer.append((char) c);
×
535
                            _state = STATE_DOCTYPE;
×
536
                        } else if ((c == 'T' || c == 't') && buflen == 5 && _buffer.compareLower("scrip", 0)) {
1!
537
                            _buffer.append((char) c);
×
538
                            _state = STATE_SCRIPT;
×
539
                        }
540

541
                        else if (c == '"' || c == '\'') {
1!
542
                            _quote = c;
1✔
543
                            _buffer.append((char) c);
1✔
544
                            _state = STATE_TAG_QUOTE;
1✔
545
                        } else {
546
                            _buffer.append((char) c);
1✔
547
                        }
548
                    }
549
                        break;
1✔
550

551
                    case STATE_TEXT: {
552
                        if (c == '<') {
1✔
553
                            _state = STATE_TAG;
1✔
554
                            if (_buffer.length() > 0) {
1!
555
                                _tokenType = TOKEN_TEXT;
1✔
556
                                break start;
1✔
557
                            }
558
                        } else {
559
                            _buffer.append((char) c);
1✔
560
                        }
561
                    }
562
                        break;
1✔
563

564
                    case STATE_TAG_QUOTE: {
565
                        if (c == '>') {
1!
566
                            _pushBack = c;
×
567
                            _state = STATE_TAG;
×
568
                        } else {
569
                            _buffer.append((char) c);
1✔
570
                            if (c == _quote) {
1✔
571
                                _state = STATE_TAG;
1✔
572
                            }
573
                        }
574
                    }
575
                        break;
576

577
                    case STATE_COMMENT: {
578
                        if (c == '>' && _comment >= 2) {
×
579
                            _buffer.setLength(_buffer.length() - 2);
×
580
                            _comment = 0;
×
581
                            _state = STATE_TEXT;
×
582
                            _tokenType = TOKEN_COMMENT;
×
583
                            break start;
×
584
                        }
585
                        if (c == '-') {
×
586
                            _comment++;
×
587
                        } else {
588
                            _comment = 0;
×
589
                        }
590

591
                        _buffer.append((char) c);
×
592
                    }
593
                        break;
×
594

595
                    case STATE_CDATA: {
596
                        if (c == '>' && _comment >= 2) {
×
597
                            _buffer.setLength(_buffer.length() - 2);
×
598
                            _comment = 0;
×
599
                            _state = STATE_TEXT;
×
600
                            _tokenType = TOKEN_CDATA;
×
601
                            break start;
×
602
                        }
603
                        if (c == ']') {
×
604
                            _comment++;
×
605
                        } else {
606
                            _comment = 0;
×
607
                        }
608

609
                        _buffer.append((char) c);
×
610
                    }
611
                        break;
×
612

613
                    case STATE_SCRIPT: {
614
                        _buffer.append((char) c);
×
615
                        if (c == '<') {
×
616
                            _comment = 0;
×
617
                        } else if (c == '/' && _comment == 0 || (c == 's' || c == 'S') && _comment == 1
×
618
                                || (c == 'c' || c == 'C') && _comment == 2 || (c == 'r' || c == 'R') && _comment == 3
619
                                || (c == 'i' || c == 'I') && _comment == 4 || (c == 'p' || c == 'P') && _comment == 5
620
                                || (c == 't' || c == 'T') && _comment == 6) {
621
                            _comment++;
×
622
                        } else if (c == '>' && _comment >= 7) {
×
623
                            _comment = 0;
×
624
                            _state = STATE_TEXT;
×
625
                            _tokenType = TOKEN_SCRIPT;
×
626
                            break start;
×
627
                        }
628
                    }
629
                        break;
630

631
                    case STATE_DOCTYPE: {
632
                        _buffer.append((char) c);
×
633
                        if (c == '>') {
×
634
                            _state = STATE_TEXT;
×
635
                            _tokenType = TOKEN_DOCTYPE;
×
636
                            break start;
×
637
                        }
638
                        _comment = 0;
×
639
                    }
640
                        break;
641
                }
642
            }
1✔
643
        }
644

645
        // Help the GC
646
        _currentTaggedContent = null;
1✔
647
        _buffer = null;
1✔
648

649
        return new FastPage(buffer, _sitemeshProperties, _htmlProperties, _metaProperties, _bodyProperties,
1✔
650
                _title.toString().trim(), _head.toString().trim(), _body.toString().trim(), _frameSet);
1✔
651
    }
652

653
    /**
654
     * Write tag.
655
     *
656
     * @param state
657
     *            the state
658
     * @param laststate
659
     *            the laststate
660
     * @param hide
661
     *            the hide
662
     * @param _head
663
     *            the head
664
     * @param _buffer
665
     *            the buffer
666
     * @param _body
667
     *            the body
668
     */
669
    private static void writeTag(int state, int laststate, boolean hide, CharArray _head, CharArray _buffer,
670
            CharArray _body) {
671
        if (!hide) {
1!
672
            if (shouldWriteToHead(state, laststate)) {
1✔
673
                _head.append('<').append(_buffer).append('>');
1✔
674
            } else {
675
                _body.append('<').append(_buffer).append('>');
1✔
676
            }
677
        }
678
    }
1✔
679

680
    /**
681
     * Should write to head.
682
     *
683
     * @param state
684
     *            the state
685
     * @param laststate
686
     *            the laststate
687
     *
688
     * @return true, if successful
689
     */
690
    private static boolean shouldWriteToHead(int state, int laststate) {
691
        return state == TAG_STATE_HEAD
1!
692
                || laststate == TAG_STATE_HEAD && (state == TAG_STATE_XML || state == TAG_STATE_XMP);
693
    }
694

695
    /**
696
     * Populates a {@link Tag} object using data from the supplied {@link CharArray}. The supplied tag parameter is
697
     * reset and reused - this avoids excess object creation which hwlps performance.
698
     *
699
     * @param tag
700
     *            the tag
701
     * @param buf
702
     *            the buf
703
     *
704
     * @return the same tag instance that was passed in, except it will be populated with a new <code>name</code> value
705
     *         (and the corresponding <code>nameEndIdx</code> value). However if the tag contained nathing but
706
     *         whitespace, this method will return <code>null</code>.
707
     */
708
    private Tag parseTag(Tag tag, CharArray buf) {
709
        int len = buf.length();
1✔
710
        int idx = 0;
1✔
711
        int begin;
712

713
        // Skip over any leading whitespace in the tag
714
        while (idx < len && Character.isWhitespace(buf.charAt(idx))) {
1!
715
            idx++;
×
716
        }
717

718
        if (idx == len) {
1!
719
            return null;
×
720
        }
721

722
        // Find out where the non-whitespace characters end. This will give us the tag name.
723
        begin = idx;
1✔
724
        while (idx < len && !Character.isWhitespace(buf.charAt(idx))) {
1✔
725
            idx++;
1✔
726
        }
727

728
        // Mark the tag name as a substring within the buffer. This allows us to perform
729
        // a substring comparison against it at a later date
730
        buf.setSubstr(begin, buf.charAt(idx - 1) == '/' ? idx - 1 : idx);
1!
731

732
        // Remember where the name finishes so we can pull out the properties later if need be
733
        tag.nameEndIdx = idx;
1✔
734

735
        return tag;
1✔
736
    }
737

738
    /**
739
     * This is called when we need to extract the properties for the tag from the tag's HTML. We only call this when
740
     * necessary since it has quite a lot of overhead.
741
     *
742
     * @param tag
743
     *            the tag that is currently being processed. This should be the tag that was returned as a result of a
744
     *            call to {@link #parseTag(FastPageParser.Tag, CharArray)} (ie, it has the <code>name</code> and
745
     *            <code>nameEndIdx</code> fields set correctly for the tag in question. The <code>properties</code>
746
     *            field can be in an undefined state - it will get replaced regardless).
747
     * @param buffer
748
     *            a <code>CharArray</code> containing the entire tag that is being parsed.
749
     *
750
     * @return the same tag instance that was passed in, only it will now be populated with any properties that were
751
     *         specified in the tag's HTML.
752
     */
753
    private static Tag parseProperties(Tag tag, CharArray buffer) {
754
        int len = buffer.length();
1✔
755
        int idx = tag.nameEndIdx;
1✔
756

757
        // Start with an empty hashmap. A new HashMap is lazy-created if we happen to find any properties
758
        tag.properties = Collections.emptyMap();
1✔
759
        int begin;
760
        while (idx < len) {
1✔
761
            // Skip forward to the next non-whitespace character
762
            while (idx < len && Character.isWhitespace(buffer.charAt(idx))) {
1!
763
                idx++;
1✔
764
            }
765

766
            if (idx == len) {
1!
767
                continue;
×
768
            }
769

770
            begin = idx;
1✔
771
            if (buffer.charAt(idx) == '"') {
1!
772
                idx++;
×
773
                while (idx < len && buffer.charAt(idx) != '"') {
×
774
                    idx++;
×
775
                }
776
                if (idx == len) {
×
777
                    continue;
×
778
                }
779
                idx++;
×
780
            } else if (buffer.charAt(idx) == '\'') {
1!
781
                idx++;
×
782
                while (idx < len && buffer.charAt(idx) != '\'') {
×
783
                    idx++;
×
784
                }
785
                if (idx == len) {
×
786
                    continue;
×
787
                }
788
                idx++;
×
789
            } else {
790
                while (idx < len && !Character.isWhitespace(buffer.charAt(idx)) && buffer.charAt(idx) != '=') {
1!
791
                    idx++;
1✔
792
                }
793
            }
794

795
            // Mark the substring. This is the attribute name
796
            buffer.setSubstr(begin, idx);
1✔
797

798
            if (idx < len && Character.isWhitespace(buffer.charAt(idx))) {
1!
799
                while (idx < len && Character.isWhitespace(buffer.charAt(idx))) {
×
800
                    idx++;
×
801
                }
802
            }
803

804
            if (idx == len || buffer.charAt(idx) != '=') {
1!
805
                continue;
×
806
            }
807

808
            idx++;
1✔
809

810
            if (idx == len) {
1!
811
                continue;
×
812
            }
813

814
            while (idx < len && (buffer.charAt(idx) == '\n' || buffer.charAt(idx) == '\r')) {
1!
815
                idx++;
×
816
            }
817

818
            if (buffer.charAt(idx) == ' ') {
1!
819
                while (idx < len && Character.isWhitespace(buffer.charAt(idx))) {
×
820
                    idx++;
×
821
                }
822
                if (idx == len || buffer.charAt(idx) != '"' && buffer.charAt(idx) != '"') {
×
823
                    continue;
×
824
                }
825
            }
826

827
            begin = idx;
1✔
828
            int end;
829
            if (buffer.charAt(idx) == '"') {
1!
830
                idx++;
1✔
831
                begin = idx;
1✔
832
                while (idx < len && buffer.charAt(idx) != '"') {
1!
833
                    idx++;
1✔
834
                }
835
                if (idx == len) {
1!
836
                    continue;
×
837
                }
838
                end = idx;
1✔
839
                idx++;
1✔
840
            } else if (buffer.charAt(idx) == '\'') {
×
841
                idx++;
×
842
                begin = idx;
×
843
                while (idx < len && buffer.charAt(idx) != '\'') {
×
844
                    idx++;
×
845
                }
846
                if (idx == len) {
×
847
                    continue;
×
848
                }
849
                end = idx;
×
850
                idx++;
×
851
            } else {
852
                while (idx < len && !Character.isWhitespace(buffer.charAt(idx))) {
×
853
                    idx++;
×
854
                }
855
                end = idx;
×
856
            }
857
            // Extract the name and value as String objects and add them to the property map
858
            String name = buffer.getLowerSubstr();
1✔
859
            String value = buffer.substring(begin, end);
1✔
860

861
            tag.addProperty(name, value);
1✔
862
        }
1✔
863
        return tag;
1✔
864
    }
865

866
    /**
867
     * The Class Tag.
868
     */
869
    private static class Tag {
1✔
870
        /**
871
         * The name end idx.
872
         * <p>
873
         * The index where the name string ends. This is used as the starting offet if we need to continue processing to
874
         * find the tag's properties.
875
         */
876
        public int nameEndIdx = 0;
1✔
877

878
        /**
879
         * The properties.
880
         * <p>
881
         * This holds a map of the various properties for a particular tag. This map is only populated when required -
882
         * normally it will remain empty.
883
         */
884
        public Map<String, String> properties = Collections.emptyMap();
1✔
885

886
        /**
887
         * Adds a name/value property pair to this tag. Each property that is added represents a property that was
888
         * parsed from the tag's HTML.
889
         *
890
         * @param name
891
         *            the name
892
         * @param value
893
         *            the value
894
         */
895
        public void addProperty(String name, String value) {
896
            if (properties.isEmpty()) {
1✔
897
                properties = new HashMap<>(8);
1✔
898
            }
899
            properties.put(name, value);
1✔
900
        }
1✔
901
    }
902
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc