• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

xemlock / htmlpurifier-html5 / 21835817105

09 Feb 2026 06:08PM UTC coverage: 99.277% (+0.001%) from 99.276%
21835817105

push

github

xemlock
Fix catastrophic backtracking in Core.AggressivelyFixLt

2 of 2 new or added lines in 1 file covered. (100.0%)

10 existing lines in 3 files now uncovered.

1510 of 1521 relevant lines covered (99.28%)

3880.54 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

100.0
/library/HTMLPurifier/Lexer/HTML5.php
1
<?php
2

3
/**
4
 * Experimental HTML5-compliant parser using masterminds/html5 library.
5
 */
6
class HTMLPurifier_Lexer_HTML5 extends HTMLPurifier_Lexer_DOMLex
7
{
8
    /**
9
     * @throws HTMLPurifier_Exception
10
     * @codeCoverageIgnore
11
     */
12
    public function __construct()
13
    {
14
        if (!class_exists('\Masterminds\HTML5')) {
15
            throw new HTMLPurifier_Exception('Cannot instantiate HTML5 lexer. \Masterminds\HTML5 class is not available');
16
        }
17
        parent::__construct();
18
    }
19

20
    /**
21
     * Transforms an HTML string into tokens.
22
     *
23
     * @param  string                $html
24
     * @param  HTMLPurifier_Config   $config
25
     * @param  HTMLPurifier_Context  $context
26
     * @return HTMLPurifier_Token[]
27
     */
28
    public function tokenizeHTML($html, $config, $context)
29
    {
30
        $html = $this->normalize($html, $config, $context);
528✔
31
        $html = $this->armor($html, $config);
528✔
32

33
        // masterminds/html5 requires <html>, <head> and <body> tags
34
        $html = $this->wrapHTML($html, $config, $context, false);
528✔
35

36
        // Parse the document. $dom is a DOMDocument.
37
        $html5 = new \Masterminds\HTML5(array('disable_html_ns' => true));
528✔
38
        $doc = $html5->loadHTML($html);
528✔
39

40
        $body = $doc->getElementsByTagName('html')->item(0)  // <html>
528✔
41
                    ->getElementsByTagName('body')->item(0); // <body>
528✔
42

43
        $tokens = array();
528✔
44
        $this->tokenizeDOM($body, $tokens, $config);
528✔
45

46
        return $tokens;
528✔
47
    }
48

49
    /**
50
     * Attempt to armor stray angled brackets that cannot possibly
51
     * form tags and thus are probably being used as emoticons
52
     *
53
     * @param  string               $html
54
     * @param  HTMLPurifier_Config  $config
55
     * @return string
56
     */
57
    protected function armor($html, HTMLPurifier_Config $config)
58
    {
59
        if ($config->get('Core.AggressivelyFixLt')) {
528✔
60
            // Since HTMLPurifier 4.19.0, see: https://github.com/ezyang/htmlpurifier/pull/440
61
            if (method_exists(get_parent_class($this), 'aggressivelyFixLt')) {
528✔
62
                return $this->aggressivelyFixLt($html);
484✔
63
            }
64

UNCOV
65
            $char = '[^a-z!\/]';
44✔
UNCOV
66
            $comment = "/<!--(.*?)(-->|\z)/is";
44✔
UNCOV
67
            $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
44✔
68

69
            do {
UNCOV
70
                $old = $html;
44✔
UNCOV
71
                $html = preg_replace("/<($char)/i", '&lt;\\1', $html);
44✔
UNCOV
72
            } while ($html !== $old);
44✔
73

UNCOV
74
            $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
44✔
75
        }
44✔
76

UNCOV
77
        return $html;
44✔
78
    }
79
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc