• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

heimrichhannot / contao-utils-bundle / 6004356604

28 Aug 2023 08:01PM UTC coverage: 22.181% (-0.4%) from 22.549%
6004356604

push

github

koertho
removed dependency on request bundle

93 of 93 new or added lines in 2 files covered. (100.0%)

1196 of 5392 relevant lines covered (22.18%)

1.57 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/Request/RequestCleaner.php
1
<?php
2

3
namespace HeimrichHannot\UtilsBundle\Request;
4

5
use Contao\Input;
6
use Contao\StringUtil;
7
use Contao\Validator;
8
use Symfony\Component\CssSelector\Exception\SyntaxErrorException;
9
use Wa72\HtmlPageDom\HtmlPageCrawler;
10

11
/**
12
 * @internal
13
 */
14
class RequestCleaner
15
{
16
    /**
17
     * XSS clean, decodeEntities, tidy/strip tags, encode special characters and encode inserttags and return save, cleaned value(s).
18
     *
19
     * @param mixed $value            The input value
20
     * @param bool  $decodeEntities   If true, all entities will be decoded
21
     * @param bool  $encodeInsertTags If true, encode the opening and closing delimiters of insert tags
22
     * @param bool  $tidy             If true, varValue is tidied up
23
     * @param bool  $strictMode       If true, the xss cleaner removes also JavaScript event handlers
24
     *
25
     * @return mixed The cleaned value
26
     */
27
    public function clean($value, bool $decodeEntities = false, bool $encodeInsertTags = true, bool $tidy = true, bool $strictMode = true)
28
    {
29
        // do not clean, otherwise empty string will be returned, not null
30
        if (null === $value) {
×
31
            return $value;
×
32
        }
33

34
        if (\is_array($value)) {
×
35
            foreach ($value as $i => $childValue) {
×
36
                $value[$i] = $this->clean($childValue, $decodeEntities, $encodeInsertTags, $tidy, $strictMode);
×
37
            }
38

39
            return $value;
×
40
        }
41

42
        // do not handle binary uuid
43
        if (Validator::isUuid($value)) {
×
44
            return $value;
×
45
        }
46

47
        $value = $this->xssClean($value, $strictMode);
×
48

49
        if ($tidy) {
×
50
            $value = $this->tidy($value);
×
51
        } else {
52
            // decodeEntities for tidy is more complex, because non allowed tags should be displayed as readable text, not as html entity
53
            $value = Input::decodeEntities($value);
×
54
        }
55

56
        // do not encodeSpecialChars when tidy did run, otherwise non allowed tags will be encoded twice
57
        if (!$decodeEntities && !$tidy) {
×
58
            $value = Input::encodeSpecialChars($value);
×
59
        }
60

61
        if ($encodeInsertTags) {
×
62
            $value = Input::encodeInsertTags($value);
×
63
        }
64

65
        return $value;
×
66
    }
67

68
    /**
69
     * XSS clean, decodeEntities, tidy/strip tags, encode special characters and encode inserttags and return save, cleaned value(s).
70
     *
71
     * @param mixed  $value            The input value
72
     * @param bool   $decodeEntities   If true, all entities will be decoded
73
     * @param bool   $encodeInsertTags If true, encode the opening and closing delimiters of insert tags
74
     * @param string $allowedTags      List of allowed html tags
75
     * @param bool   $tidy             If true, varValue is tidied up
76
     * @param bool   $strictMode       If true, the xss cleaner removes also JavaScript event handlers
77
     *
78
     * @return mixed The cleaned value
79
     */
80
    public function cleanHtml($value, bool $decodeEntities = false, bool $encodeInsertTags = true, string $allowedTags = '', bool $tidy = true, bool $strictMode = true)
81
    {
82
        // do not clean, otherwise empty string will be returned, not null
83
        if (null === $value) {
×
84
            return $value;
×
85
        }
86

87
        if (\is_array($value)) {
×
88
            foreach ($value as $i => $childValue) {
×
89
                $value[$i] = $this->cleanHtml($childValue, $decodeEntities, $encodeInsertTags, $allowedTags, $tidy, $strictMode);
×
90
            }
91

92
            return $value;
×
93
        }
94

95
        // do not handle binary uuid
96
        if (Validator::isUuid($value)) {
×
97
            return $value;
×
98
        }
99

100
        $value = $this->xssClean($value, $strictMode);
×
101

102
        if ($tidy) {
×
103
            $value = $this->tidy($value, $allowedTags, $decodeEntities);
×
104
        } else {
105
            // decodeEntities for tidy is more complex, because non allowed tags should be displayed as readable text, not as html entity
106
            $value = Input::decodeEntities($value);
×
107
        }
108

109
        // do not encodeSpecialChars when tidy did run, otherwise non allowed tags will be encoded twice
110
        if (!$decodeEntities && !$tidy) {
×
111
            $value = Input::encodeSpecialChars($value);
×
112
        }
113

114
        if ($encodeInsertTags) {
×
115
            $value = Input::encodeInsertTags($value);
×
116
        }
117

118
        return $value;
×
119
    }
120

121
    /**
122
     * Clean a value and try to prevent XSS attacks.
123
     *
124
     * @param mixed $varValue   A string or array
125
     * @param bool  $strictMode If true, the function removes also JavaScript event handlers
126
     *
127
     * @return mixed The cleaned string or array
128
     */
129
    public function xssClean($varValue, bool $strictMode = false)
130
    {
131
        if (\is_array($varValue)) {
×
132
            foreach ($varValue as $key => $value) {
×
133
                $varValue[$key] = $this->xssClean($value, $strictMode);
×
134
            }
135

136
            return $varValue;
×
137
        }
138

139
        // do not xss clean binary uuids
140
        if (Validator::isBinaryUuid($varValue)) {
×
141
            return $varValue;
×
142
        }
143

144
        // Fix issue StringUtils::decodeEntites() returning empty string when value is 0 in some contao 4.9 versions
145
        if ('0' !== $varValue && 0 !== $varValue) {
×
146
            $varValue = StringUtil::decodeEntities($varValue);
×
147
        }
148

149
        $varValue = preg_replace('/(&#[A-Za-z0-9]+);?/i', '$1;', $varValue);
×
150

151
        // fix: "><script>alert('xss')</script> or '></SCRIPT>">'><SCRIPT>alert(String.fromCharCode(88,83,83))</SCRIPT>
152
        $varValue = preg_replace('/(?<!\w)(?>["|\']>)+(<[^\/^>]+>.*)/', '$1', $varValue);
×
153

154
        $varValue = Input::xssClean($varValue, $strictMode);
×
155

156
        return $varValue;
×
157
    }
158

159
    /**
160
     * Tidy an value.
161
     *
162
     * @param string $varValue       Input value
163
     * @param string $allowedTags    Allowed tags as string `<p><span>`
164
     * @param bool   $decodeEntities If true, all entities will be decoded
165
     *
166
     * @return string The tidied string
167
     */
168
    public function tidy($varValue, string $allowedTags = '', bool $decodeEntities = false): string
169
    {
170
        if (!$varValue) {
×
171
            return $varValue;
×
172
        }
173

174
        // do not tidy non-xss critical characters for performance
175
        if (!preg_match('#"|\'|<|>|\(|\)#', StringUtil::decodeEntities($varValue))) {
×
176
            return $varValue;
×
177
        }
178

179
        // remove illegal white spaces after closing tag slash <br / >
180
        $varValue = preg_replace('@\/(\s+)>@', '/>', $varValue);
×
181

182
        // Encode opening tag arrow brackets
183
        $varValue = preg_replace_callback('/<(?(?=!--)!--[\s\S]*--|(?(?=\?)\?[\s\S]*\?|(?(?=\/)\/[^.\-\d][^\/\]\'"[!#$%&()*+,;<=>?@^`{|}~ ]*|[^.\-\d][^\/\]\'"[!#$%&()*+,;<=>?@^`{|}~ ]*(?:\s[^.\-\d][^\/\]\'"[!#$%&()*+,;<=>?@^`{|}~ ]*(?:=(?:"[^"]*"|\'[^\']*\'|[^\'"<\s]*))?)*)\s?\/?))>/', function ($matches) {
×
184
            return substr_replace($matches[0], '&lt;', 0, 1);
×
185
        }, $varValue);
×
186

187
        // Encode less than signs that are no tags with [lt]
188
        $varValue = str_replace('<', '[lt]', $varValue);
×
189

190
        // After we saved less than signs with [lt] revert &lt; sign to <
191
        $varValue = StringUtil::decodeEntities($varValue);
×
192

193
        // Restore HTML comments
194
        $varValue = str_replace(['&lt;!--', '&lt;!['], ['<!--', '<!['], $varValue);
×
195

196
        // Recheck for encoded null bytes
197
        while (false !== strpos($varValue, '\\0')) {
×
198
            $varValue = str_replace('\\0', '', $varValue);
×
199
        }
200

201
        $objCrawler = new HtmlPageCrawler($varValue);
×
202

203
        if (!$objCrawler->isHtmlDocument()) {
×
204
            $objCrawler = new HtmlPageCrawler('<div id="tidyWrapperx123x123xawec3">'.$varValue.'</div>');
×
205
        }
206

207
        $arrAllowedTags = explode('<', str_replace('>', '', $allowedTags));
×
208
        $arrAllowedTags = array_filter($arrAllowedTags);
×
209

210
        try {
211
            if (!empty($arrAllowedTags)) {
×
212
                $objCrawler->filter('*')->each(function ($node, $i) use ($arrAllowedTags) {
×
213
                    /** @var $node HtmlPageCrawler */
214

215
                    // skip wrapper
216
                    if ('tidyWrapperx123x123xawec3' === $node->getAttribute('id')) {
×
217
                        return $node;
×
218
                    }
219

220
                    if (!\in_array($node->getNode(0)->tagName, $arrAllowedTags, true)) {
×
221
                        $strHTML = $node->saveHTML();
×
222
                        $strHTML = str_replace(['<', '>'], ['[[xlt]]', '[[xgt]]'], $strHTML);
×
223

224
                        // remove unwanted tags and return the element text
225
                        return $node->replaceWith($strHTML);
×
226
                    }
227

228
                    return $node;
×
229
                });
×
230
            }
231
            // unwrap div#tidyWrapper and set value to its innerHTML
232
            if (!$objCrawler->isHtmlDocument()) {
×
233
                $varValue = $objCrawler->filter('div#tidyWrapperx123x123xawec3')->getInnerHtml();
×
234
            } else {
235
                $varValue = $objCrawler->saveHTML();
×
236
            }
237

238
            // HTML documents or fragments, Crawler first converts all non-ASCII characters to entities (see: https://github.com/wasinger/htmlpagedom/issues/5)
239
            $varValue = StringUtil::decodeEntities($varValue);
×
240

241
            // trim last [nbsp] occurance
242
            $varValue = preg_replace('@(\[nbsp\])+@', '', $varValue);
×
243
        } catch (SyntaxErrorException $e) {
×
244
        }
245

246
        $varValue = $this->restoreBasicEntities($varValue, $decodeEntities);
×
247

248
        if (!$decodeEntities) {
×
249
            $varValue = Input::encodeSpecialChars($varValue);
×
250
        }
251

252
        // encode unwanted tag opening and closing brakets
253
        $arrSearch = ['[[xlt]]', '[[xgt]]'];
×
254
        $arrReplace = ['&#60;', '&#62;'];
×
255
        $varValue = str_replace($arrSearch, $arrReplace, $varValue);
×
256

257
        return $varValue;
×
258
    }
259
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc