• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

heimrichhannot / contao-utils-bundle / 6110446646

07 Sep 2023 01:32PM UTC coverage: 22.152% (-0.02%) from 22.169%
6110446646

push

github

koertho
fixed undefined method exception in request cleaner

4 of 4 new or added lines in 1 file covered. (100.0%)

1196 of 5399 relevant lines covered (22.15%)

1.57 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/Request/RequestCleaner.php
1
<?php
2

3
/*
4
 * Copyright (c) 2023 Heimrich & Hannot GmbH
5
 *
6
 * @license LGPL-3.0-or-later
7
 */
8

9
namespace HeimrichHannot\UtilsBundle\Request;
10

11
use Contao\Input;
12
use Contao\StringUtil;
13
use Contao\Validator;
14
use Symfony\Component\CssSelector\Exception\SyntaxErrorException;
15
use Wa72\HtmlPageDom\HtmlPageCrawler;
16

17
/**
18
 * @internal
19
 */
20
class RequestCleaner
21
{
22
    /**
23
     * XSS clean, decodeEntities, tidy/strip tags, encode special characters and encode inserttags and return save, cleaned value(s).
24
     *
25
     * @param mixed $value            The input value
26
     * @param bool  $decodeEntities   If true, all entities will be decoded
27
     * @param bool  $encodeInsertTags If true, encode the opening and closing delimiters of insert tags
28
     * @param bool  $tidy             If true, varValue is tidied up
29
     * @param bool  $strictMode       If true, the xss cleaner removes also JavaScript event handlers
30
     *
31
     * @return mixed The cleaned value
32
     */
33
    public function clean($value, bool $decodeEntities = false, bool $encodeInsertTags = true, bool $tidy = true, bool $strictMode = true)
34
    {
35
        // do not clean, otherwise empty string will be returned, not null
36
        if (null === $value) {
×
37
            return $value;
×
38
        }
39

40
        if (\is_array($value)) {
×
41
            foreach ($value as $i => $childValue) {
×
42
                $value[$i] = $this->clean($childValue, $decodeEntities, $encodeInsertTags, $tidy, $strictMode);
×
43
            }
44

45
            return $value;
×
46
        }
47

48
        // do not handle binary uuid
49
        if (Validator::isUuid($value)) {
×
50
            return $value;
×
51
        }
52

53
        $value = $this->xssClean($value, $strictMode);
×
54

55
        if ($tidy) {
×
56
            $value = $this->tidy($value);
×
57
        } else {
58
            // decodeEntities for tidy is more complex, because non allowed tags should be displayed as readable text, not as html entity
59
            $value = Input::decodeEntities($value);
×
60
        }
61

62
        // do not encodeSpecialChars when tidy did run, otherwise non allowed tags will be encoded twice
63
        if (!$decodeEntities && !$tidy) {
×
64
            $value = Input::encodeSpecialChars($value);
×
65
        }
66

67
        if ($encodeInsertTags) {
×
68
            $value = Input::encodeInsertTags($value);
×
69
        }
70

71
        return $value;
×
72
    }
73

74
    /**
75
     * XSS clean, decodeEntities, tidy/strip tags, encode special characters and encode inserttags and return save, cleaned value(s).
76
     *
77
     * @param mixed  $value            The input value
78
     * @param bool   $decodeEntities   If true, all entities will be decoded
79
     * @param bool   $encodeInsertTags If true, encode the opening and closing delimiters of insert tags
80
     * @param string $allowedTags      List of allowed html tags
81
     * @param bool   $tidy             If true, varValue is tidied up
82
     * @param bool   $strictMode       If true, the xss cleaner removes also JavaScript event handlers
83
     *
84
     * @return mixed The cleaned value
85
     */
86
    public function cleanHtml($value, bool $decodeEntities = false, bool $encodeInsertTags = true, string $allowedTags = '', bool $tidy = true, bool $strictMode = true)
87
    {
88
        // do not clean, otherwise empty string will be returned, not null
89
        if (null === $value) {
×
90
            return $value;
×
91
        }
92

93
        if (\is_array($value)) {
×
94
            foreach ($value as $i => $childValue) {
×
95
                $value[$i] = $this->cleanHtml($childValue, $decodeEntities, $encodeInsertTags, $allowedTags, $tidy, $strictMode);
×
96
            }
97

98
            return $value;
×
99
        }
100

101
        // do not handle binary uuid
102
        if (Validator::isUuid($value)) {
×
103
            return $value;
×
104
        }
105

106
        $value = $this->xssClean($value, $strictMode);
×
107

108
        if ($tidy) {
×
109
            $value = $this->tidy($value, $allowedTags, $decodeEntities);
×
110
        } else {
111
            // decodeEntities for tidy is more complex, because non allowed tags should be displayed as readable text, not as html entity
112
            $value = Input::decodeEntities($value);
×
113
        }
114

115
        // do not encodeSpecialChars when tidy did run, otherwise non allowed tags will be encoded twice
116
        if (!$decodeEntities && !$tidy) {
×
117
            $value = Input::encodeSpecialChars($value);
×
118
        }
119

120
        if ($encodeInsertTags) {
×
121
            $value = Input::encodeInsertTags($value);
×
122
        }
123

124
        return $value;
×
125
    }
126

127
    /**
128
     * Clean a value and try to prevent XSS attacks.
129
     *
130
     * @param mixed $varValue   A string or array
131
     * @param bool  $strictMode If true, the function removes also JavaScript event handlers
132
     *
133
     * @return mixed The cleaned string or array
134
     */
135
    public function xssClean($varValue, bool $strictMode = false)
136
    {
137
        if (\is_array($varValue)) {
×
138
            foreach ($varValue as $key => $value) {
×
139
                $varValue[$key] = $this->xssClean($value, $strictMode);
×
140
            }
141

142
            return $varValue;
×
143
        }
144

145
        // do not xss clean binary uuids
146
        if (Validator::isBinaryUuid($varValue)) {
×
147
            return $varValue;
×
148
        }
149

150
        // Fix issue StringUtils::decodeEntites() returning empty string when value is 0 in some contao 4.9 versions
151
        if ('0' !== $varValue && 0 !== $varValue) {
×
152
            $varValue = StringUtil::decodeEntities($varValue);
×
153
        }
154

155
        $varValue = preg_replace('/(&#[A-Za-z0-9]+);?/i', '$1;', $varValue);
×
156

157
        // fix: "><script>alert('xss')</script> or '></SCRIPT>">'><SCRIPT>alert(String.fromCharCode(88,83,83))</SCRIPT>
158
        $varValue = preg_replace('/(?<!\w)(?>["|\']>)+(<[^\/^>]+>.*)/', '$1', $varValue);
×
159

160
        $varValue = Input::xssClean($varValue, $strictMode);
×
161

162
        return $varValue;
×
163
    }
164

165
    /**
166
     * Tidy an value.
167
     *
168
     * @param string $varValue       Input value
169
     * @param string $allowedTags    Allowed tags as string `<p><span>`
170
     * @param bool   $decodeEntities If true, all entities will be decoded
171
     *
172
     * @return string The tidied string
173
     */
174
    public function tidy($varValue, string $allowedTags = '', bool $decodeEntities = false): string
175
    {
176
        if (!$varValue) {
×
177
            return $varValue;
×
178
        }
179

180
        // do not tidy non-xss critical characters for performance
181
        if (!preg_match('#"|\'|<|>|\(|\)#', StringUtil::decodeEntities($varValue))) {
×
182
            return $varValue;
×
183
        }
184

185
        // remove illegal white spaces after closing tag slash <br / >
186
        $varValue = preg_replace('@\/(\s+)>@', '/>', $varValue);
×
187

188
        // Encode opening tag arrow brackets
189
        $varValue = preg_replace_callback('/<(?(?=!--)!--[\s\S]*--|(?(?=\?)\?[\s\S]*\?|(?(?=\/)\/[^.\-\d][^\/\]\'"[!#$%&()*+,;<=>?@^`{|}~ ]*|[^.\-\d][^\/\]\'"[!#$%&()*+,;<=>?@^`{|}~ ]*(?:\s[^.\-\d][^\/\]\'"[!#$%&()*+,;<=>?@^`{|}~ ]*(?:=(?:"[^"]*"|\'[^\']*\'|[^\'"<\s]*))?)*)\s?\/?))>/', function ($matches) {
×
190
            return substr_replace($matches[0], '&lt;', 0, 1);
×
191
        }, $varValue);
×
192

193
        // Encode less than signs that are no tags with [lt]
194
        $varValue = str_replace('<', '[lt]', $varValue);
×
195

196
        // After we saved less than signs with [lt] revert &lt; sign to <
197
        $varValue = StringUtil::decodeEntities($varValue);
×
198

199
        // Restore HTML comments
200
        $varValue = str_replace(['&lt;!--', '&lt;!['], ['<!--', '<!['], $varValue);
×
201

202
        // Recheck for encoded null bytes
203
        while (false !== strpos($varValue, '\\0')) {
×
204
            $varValue = str_replace('\\0', '', $varValue);
×
205
        }
206

207
        $objCrawler = new HtmlPageCrawler($varValue);
×
208

209
        if (!$objCrawler->isHtmlDocument()) {
×
210
            $objCrawler = new HtmlPageCrawler('<div id="tidyWrapperx123x123xawec3">'.$varValue.'</div>');
×
211
        }
212

213
        $arrAllowedTags = explode('<', str_replace('>', '', $allowedTags));
×
214
        $arrAllowedTags = array_filter($arrAllowedTags);
×
215

216
        try {
217
            if (!empty($arrAllowedTags)) {
×
218
                $objCrawler->filter('*')->each(function ($node, $i) use ($arrAllowedTags) {
×
219
                    /** @var $node HtmlPageCrawler */
220

221
                    // skip wrapper
222
                    if ('tidyWrapperx123x123xawec3' === $node->getAttribute('id')) {
×
223
                        return $node;
×
224
                    }
225

226
                    if (!\in_array($node->getNode(0)->tagName, $arrAllowedTags, true)) {
×
227
                        $strHTML = $node->saveHTML();
×
228
                        $strHTML = str_replace(['<', '>'], ['[[xlt]]', '[[xgt]]'], $strHTML);
×
229

230
                        // remove unwanted tags and return the element text
231
                        return $node->replaceWith($strHTML);
×
232
                    }
233

234
                    return $node;
×
235
                });
×
236
            }
237
            // unwrap div#tidyWrapper and set value to its innerHTML
238
            if (!$objCrawler->isHtmlDocument()) {
×
239
                $varValue = $objCrawler->filter('div#tidyWrapperx123x123xawec3')->getInnerHtml();
×
240
            } else {
241
                $varValue = $objCrawler->saveHTML();
×
242
            }
243

244
            // HTML documents or fragments, Crawler first converts all non-ASCII characters to entities (see: https://github.com/wasinger/htmlpagedom/issues/5)
245
            $varValue = StringUtil::decodeEntities($varValue);
×
246

247
            // trim last [nbsp] occurance
248
            $varValue = preg_replace('@(\[nbsp\])+@', '', $varValue);
×
249
        } catch (SyntaxErrorException $e) {
×
250
        }
251

252
        $varValue = $this->restoreBasicEntities($varValue, $decodeEntities);
×
253

254
        if (!$decodeEntities) {
×
255
            $varValue = Input::encodeSpecialChars($varValue);
×
256
        }
257

258
        // encode unwanted tag opening and closing brakets
259
        $arrSearch = ['[[xlt]]', '[[xgt]]'];
×
260
        $arrReplace = ['&#60;', '&#62;'];
×
261
        $varValue = str_replace($arrSearch, $arrReplace, $varValue);
×
262

263
        return $varValue;
×
264
    }
265

266
    /**
267
     * Restore basic entities.
268
     *
269
     * @param string $buffer         The string with the tags to be replaced
270
     * @param bool   $decodeEntities If true, all entities will be decoded
271
     *
272
     * @return string The string with the original entities
273
     */
274
    public function restoreBasicEntities(string $buffer, bool $decodeEntities = false): string
275
    {
276
        $buffer = str_replace(['[&]', '[&amp;]', '[lt]', '[gt]', '[nbsp]', '[-]'], ['&amp;', '&amp;', '&lt;', '&gt;', '&nbsp;', '&shy;'], $buffer);
×
277

278
        if ($decodeEntities) {
×
279
            $buffer = StringUtil::decodeEntities($buffer);
×
280
        }
281

282
        return $buffer;
×
283
    }
284
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc