• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

xemlock / htmlpurifier-html5 / 21836314048

09 Feb 2026 06:24PM UTC coverage: 98.75% (-0.5%) from 99.277%
21836314048

push

github

xemlock
Fix catastrophic backtracking in Core.AggressivelyFixLt

2 of 3 new or added lines in 1 file covered. (66.67%)

3 existing lines in 1 file now uncovered.

1501 of 1520 relevant lines covered (98.75%)

3547.56 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

61.9
/library/HTMLPurifier/Lexer/HTML5.php
1
<?php
2

3
/**
4
 * Experimental HTML5-compliant parser using masterminds/html5 library.
5
 */
6
class HTMLPurifier_Lexer_HTML5 extends HTMLPurifier_Lexer_DOMLex
7
{
8
    /**
9
     * @throws HTMLPurifier_Exception
10
     * @codeCoverageIgnore
11
     */
12
    public function __construct()
13
    {
14
        if (!class_exists('\Masterminds\HTML5')) {
15
            throw new HTMLPurifier_Exception('Cannot instantiate HTML5 lexer. \Masterminds\HTML5 class is not available');
16
        }
17
        parent::__construct();
18
    }
19

20
    /**
21
     * Transforms an HTML string into tokens.
22
     *
23
     * @param  string                $html
24
     * @param  HTMLPurifier_Config   $config
25
     * @param  HTMLPurifier_Context  $context
26
     * @return HTMLPurifier_Token[]
27
     */
28
    public function tokenizeHTML($html, $config, $context)
29
    {
30
        $html = $this->normalize($html, $config, $context);
495✔
31
        $html = $this->armor($html, $config);
495✔
32

33
        // masterminds/html5 requires <html>, <head> and <body> tags
34
        $html = $this->wrapHTML($html, $config, $context, false);
495✔
35

36
        // Parse the document. $dom is a DOMDocument.
37
        $html5 = new \Masterminds\HTML5(array('disable_html_ns' => true));
495✔
38
        $doc = $html5->loadHTML($html);
495✔
39

40
        $body = $doc->getElementsByTagName('html')->item(0)  // <html>
495✔
41
                    ->getElementsByTagName('body')->item(0); // <body>
495✔
42

43
        $tokens = array();
495✔
44
        $this->tokenizeDOM($body, $tokens, $config);
495✔
45

46
        return $tokens;
495✔
47
    }
48

49
    /**
50
     * Attempt to armor stray angled brackets that cannot possibly
51
     * form tags and thus are probably being used as emoticons
52
     *
53
     * @param  string               $html
54
     * @param  HTMLPurifier_Config  $config
55
     * @return string
56
     */
57
    protected function armor($html, HTMLPurifier_Config $config)
58
    {
59
        if ($config->get('Core.AggressivelyFixLt')) {
495✔
60
            // Since HTMLPurifier 4.19.0, see: https://github.com/ezyang/htmlpurifier/pull/440
61
            if (method_exists(get_parent_class($this), 'aggressivelyFixLt')) {
495✔
62
                return $this->aggressivelyFixLt($html);
495✔
63
            }
64

UNCOV
65
            $char = '[^a-z!\/]';
×
NEW
66
            $comment = "/<!--([^>]*)(-->|\z)/is";
×
UNCOV
67
            $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
×
68

69
            do {
70
                $old = $html;
×
71
                $html = preg_replace("/<($char)/i", '&lt;\\1', $html);
×
72
            } while ($html !== $old);
×
73

UNCOV
74
            $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
×
75
        }
76

77
        return $html;
×
78
    }
79
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc