• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

dg / texy / 12879605443

21 Jan 2025 03:31AM UTC coverage: 92.224% (+0.03%) from 92.197%
12879605443

push

github

dg
regexp: uses unmatched as null (BC break)

14 of 14 new or added lines in 6 files covered. (100.0%)

101 existing lines in 14 files now uncovered.

2372 of 2572 relevant lines covered (92.22%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.56
/src/Texy/Modules/HtmlModule.php
1
<?php
2

3
/**
4
 * This file is part of the Texy! (https://texy.info)
5
 * Copyright (c) 2004 David Grudl (https://davidgrudl.com)
6
 */
7

8
declare(strict_types=1);
9

10
namespace Texy\Modules;
11

12
use Texy;
13
use Texy\HtmlElement;
14
use Texy\Patterns;
15
use Texy\Regexp;
16

17

18
/**
19
 * Html tags module.
20
 */
21
final class HtmlModule extends Texy\Module
22
{
23
        /** pass HTML comments to output? */
24
        public bool $passComment = true;
25

26

27
        public function __construct(Texy\Texy $texy)
1✔
28
        {
29
                $this->texy = $texy;
1✔
30

31
                $texy->addHandler('htmlComment', $this->solveComment(...));
1✔
32
                $texy->addHandler('htmlTag', $this->solveTag(...));
1✔
33

34
                $texy->registerLinePattern(
1✔
35
                        $this->patternTag(...),
1✔
36
                        '~
37
                                < (/?)                          # tag begin
38
                                ([a-z][a-z0-9_:-]{0,50})        # tag name
39
                                (
40
                                        (?:
41
                                                \s++ [a-z0-9\_:-]++ |   # attribute name
42
                                                = \s*+ " [^"' . Patterns::MARK . ']*+ " |     # attribute value in double quotes
43
                                                = \s*+ \' [^\'' . Patterns::MARK . ']*+ \' |  # attribute value in single quotes
44
                                                = [^\s>' . Patterns::MARK . ']++              # attribute value without quotes
1✔
45
                                        )*
46
                                )
47
                                \s*+
48
                                (/?)                             # self-closing slash
49
                                >
50
                        ~is',
51
                        'html/tag',
1✔
52
                );
53

54
                $texy->registerLinePattern(
1✔
55
                        $this->patternComment(...),
1✔
56
                        '~
57
                                <!--
58
                                ( [^' . Patterns::MARK . ']*? )
1✔
59
                                -->
60
                        ~is',
61
                        'html/comment',
1✔
62
                );
63
        }
1✔
64

65

66
        /**
67
         * Callback for: <!-- comment -->.
68
         */
69
        public function patternComment(Texy\LineParser $parser, array $matches): HtmlElement|string|null
1✔
70
        {
71
                [, $mComment] = $matches;
1✔
72
                return $this->texy->invokeAroundHandlers('htmlComment', $parser, [$mComment]);
1✔
73
        }
74

75

76
        /**
77
         * Callback for: <tag attr="...">.
78
         */
79
        public function patternTag(Texy\LineParser $parser, array $matches): HtmlElement|string|null
1✔
80
        {
81
                [, $mEnd, $mTag, $mAttr, $mEmpty] = $matches;
1✔
82
                // [1] => /
83
                // [2] => tag
84
                // [3] => attributes
85
                // [4] => /
86

87
                $isStart = $mEnd !== '/';
1✔
88
                $isEmpty = $mEmpty === '/';
1✔
89
                if (!$isEmpty && str_ends_with($mAttr, '/')) { // uvizlo v $mAttr?
1✔
UNCOV
90
                        $mAttr = substr($mAttr, 0, -1);
×
UNCOV
91
                        $isEmpty = true;
×
92
                }
93

94
                // error - can't close empty element
95
                if ($isEmpty && !$isStart) {
1✔
UNCOV
96
                        return null;
×
97
                }
98

99
                // error - end element with atttrs
100
                $mAttr = trim(strtr($mAttr, "\n", ' '));
1✔
101
                if ($mAttr && !$isStart) {
1✔
102
                        return null;
1✔
103
                }
104

105
                $el = new HtmlElement($mTag);
1✔
106
                if ($isStart) {
1✔
107
                        $el->attrs = $this->parseAttributes($mAttr);
1✔
108
                }
109

110
                $res = $this->texy->invokeAroundHandlers('htmlTag', $parser, [$el, $isStart, $isEmpty]);
1✔
111

112
                if ($res instanceof HtmlElement) {
1✔
113
                        return $this->texy->protect($isStart ? $res->startTag() : $res->endTag(), $res->getContentType());
1✔
114
                }
115

116
                return $res;
1✔
117
        }
118

119

120
        /**
121
         * Finish invocation.
122
         */
123
        private function solveTag(
1✔
124
                Texy\HandlerInvocation $invocation,
125
                HtmlElement $el,
126
                bool $isStart,
127
                ?bool $forceEmpty = null,
128
        ): HtmlElement|string|null
129
        {
130
                $texy = $this->texy;
1✔
131

132
                // tag & attibutes
133
                $allowedTags = $texy->allowedTags; // speed-up
1✔
134
                if (!$allowedTags) {
1✔
135
                        return null; // all tags are disabled
1✔
136
                }
137

138
                // convert case
139
                $name = $el->getName();
1✔
140
                $lower = strtolower($name);
1✔
141
                if (isset($texy->getDTD()[$lower]) || $name === strtoupper($name)) {
1✔
142
                        // complete UPPER convert to lower
143
                        $name = $lower;
1✔
144
                        $el->setName($name);
1✔
145
                }
146

147
                if (is_array($allowedTags)) {
1✔
148
                        if (!isset($allowedTags[$name])) {
1✔
149
                                return null;
1✔
150
                        }
151
                } else { // allowedTags === Texy\Texy::ALL
UNCOV
152
                        if ($forceEmpty) {
×
UNCOV
153
                                $el->setName($name, true);
×
154
                        }
155
                }
156

157
                // end tag? we are finished
158
                if (!$isStart) {
1✔
159
                        return $el;
1✔
160
                }
161

162
                $this->applyAttrs($el->attrs, is_array($allowedTags) ? $allowedTags[$name] : $texy::ALL);
1✔
163
                $this->applyClasses($el->attrs, $texy->getAllowedProps()[0]);
1✔
164
                $this->applyStyles($el->attrs, $texy->getAllowedProps()[1]);
1✔
165
                if (!$this->validateAttrs($el, $texy)) {
1✔
166
                        return null;
1✔
167
                }
168

169
                $el->validateAttrs($texy->getDTD());
1✔
170

171
                return $el;
1✔
172
        }
173

174

175
        /**
176
         * Finish invocation.
177
         */
178
        private function solveComment(Texy\HandlerInvocation $invocation, string $content): string
1✔
179
        {
180
                if (!$this->passComment) {
1✔
UNCOV
181
                        return '';
×
182
                }
183

184
                // sanitize comment
185
                $content = Regexp::replace($content, '~-{2,}~', ' - ');
1✔
186
                $content = trim($content, '-');
1✔
187

188
                return $this->texy->protect('<!--' . $content . '-->', Texy\Texy::CONTENT_MARKUP);
1✔
189
        }
190

191

192
        private function applyAttrs(&$attrs, $allowedAttrs): void
193
        {
194
                if (!$allowedAttrs) {
1✔
195
                        $attrs = [];
1✔
196

197
                } elseif (is_array($allowedAttrs)) {
1✔
198
                        // skip disabled
199
                        $allowedAttrs = array_flip($allowedAttrs);
1✔
200
                        foreach ($attrs as $key => $foo) {
1✔
201
                                if (!isset($allowedAttrs[$key])) {
1✔
202
                                        unset($attrs[$key]);
×
203
                                }
204
                        }
205
                }
206
        }
1✔
207

208

209
        private function applyClasses(&$attrs, $allowedClasses): void
210
        {
211
                if (!isset($attrs['class'])) {
1✔
UNCOV
212
                } elseif (is_array($allowedClasses)) {
×
UNCOV
213
                        $attrs['class'] = explode(' ', $attrs['class']);
×
UNCOV
214
                        foreach ($attrs['class'] as $key => $value) {
×
UNCOV
215
                                if (!isset($allowedClasses[$value])) {
×
UNCOV
216
                                        unset($attrs['class'][$key]); // id & class are case-sensitive
×
217
                                }
218
                        }
UNCOV
219
                } elseif ($allowedClasses !== Texy\Texy::ALL) {
×
220
                        $attrs['class'] = null;
×
221
                }
222

223
                if (!isset($attrs['id'])) {
1✔
224
                } elseif (is_array($allowedClasses)) {
1✔
225
                        if (!isset($allowedClasses['#' . $attrs['id']])) {
×
226
                                $attrs['id'] = null;
×
227
                        }
228
                } elseif ($allowedClasses !== Texy\Texy::ALL) {
1✔
UNCOV
229
                        $attrs['id'] = null;
×
230
                }
231
        }
1✔
232

233

234
        private function applyStyles(&$attrs, $allowedStyles): void
235
        {
236
                if (!isset($attrs['style'])) {
1✔
237
                } elseif (is_array($allowedStyles)) {
1✔
UNCOV
238
                        $tmp = explode(';', $attrs['style']);
×
UNCOV
239
                        $attrs['style'] = null;
×
UNCOV
240
                        foreach ($tmp as $value) {
×
241
                                $pair = explode(':', $value, 2);
×
UNCOV
242
                                $prop = trim($pair[0]);
×
243
                                if (isset($pair[1], $allowedStyles[strtolower($prop)])) { // CSS is case-insensitive
×
UNCOV
244
                                        $attrs['style'][$prop] = $pair[1];
×
245
                                }
246
                        }
247
                } elseif ($allowedStyles !== Texy\Texy::ALL) {
1✔
UNCOV
248
                        $attrs['style'] = null;
×
249
                }
250
        }
1✔
251

252

253
        private function validateAttrs(HtmlElement $el, Texy\Texy $texy): bool
1✔
254
        {
255
                foreach (['src', 'href', 'name', 'id'] as $attr) {
1✔
256
                        if (isset($el->attrs[$attr])) {
1✔
257
                                $el->attrs[$attr] = is_string($el->attrs[$attr])
1✔
258
                                        ? trim($el->attrs[$attr])
1✔
UNCOV
259
                                        : '';
×
260
                                if ($el->attrs[$attr] === '') {
1✔
UNCOV
261
                                        unset($el->attrs[$attr]);
×
262
                                }
263
                        }
264
                }
265

266
                $name = $el->getName();
1✔
267
                if ($name === 'img') {
1✔
268
                        if (!isset($el->attrs['src']) || !$texy->checkURL($el->attrs['src'], $texy::FILTER_IMAGE)) {
1✔
UNCOV
269
                                return false;
×
270
                        }
271

272
                        $texy->summary['images'][] = $el->attrs['src'];
1✔
273

274
                } elseif ($name === 'a') {
1✔
275
                        if (!isset($el->attrs['href']) && !isset($el->attrs['name']) && !isset($el->attrs['id'])) {
1✔
276
                                return false;
1✔
277
                        }
278

279
                        if (isset($el->attrs['href'])) {
1✔
280
                                if ($texy->linkModule->forceNoFollow && str_contains($el->attrs['href'], '//')) {
1✔
281
                                        if (isset($el->attrs['rel'])) {
1✔
282
                                                $el->attrs['rel'] = (array) $el->attrs['rel'];
1✔
283
                                        }
284

285
                                        $el->attrs['rel'][] = 'nofollow';
1✔
286
                                }
287

288
                                if (!$texy->checkURL($el->attrs['href'], $texy::FILTER_ANCHOR)) {
1✔
289
                                        return false;
1✔
290
                                }
291

292
                                $texy->summary['links'][] = $el->attrs['href'];
1✔
293
                        }
294
                } elseif (Regexp::match($name, '~^h[1-6]~i')) {
1✔
295
                        $texy->headingModule->TOC[] = [
1✔
296
                                'el' => $el,
1✔
297
                                'level' => (int) substr($name, 1),
1✔
298
                                'type' => 'html',
1✔
299
                        ];
300
                }
301

302
                return true;
1✔
303
        }
304

305

306
        private function parseAttributes(string $attrs): array
1✔
307
        {
308
                $res = [];
1✔
309
                $matches = Regexp::matchAll(
1✔
310
                        $attrs,
1✔
311
                        <<<'X'
312
                                ~
1✔
313
                                ([a-z0-9\_:-]+)                # attribute name
314
                                \s*
315
                                (?:
316
                                        = \s*                      # equals sign
317
                                        (
318
                                                ' [^']* ' |            # single quoted value
319
                                                " [^"]* " |            # double quoted value
320
                                                [^'"\s]+               # unquoted value
321
                                        )
322
                                )?
323
                                ~is
324
                                X,
325
                );
326

327
                foreach ($matches as $m) {
1✔
328
                        $key = strtolower($m[1]);
1✔
329
                        $value = $m[2];
1✔
330
                        if ($value == null) {
1✔
331
                                $res[$key] = true;
1✔
332
                        } elseif ($value[0] === '\'' || $value[0] === '"') {
1✔
333
                                $res[$key] = Texy\Helpers::unescapeHtml(substr($value, 1, -1));
1✔
334
                        } else {
335
                                $res[$key] = Texy\Helpers::unescapeHtml($value);
1✔
336
                        }
337
                }
338

339
                return $res;
1✔
340
        }
341
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc