• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

dg / texy / 22278787079

22 Feb 2026 02:13PM UTC coverage: 93.049% (-0.008%) from 93.057%
22278787079

push

github

dg
added CLAUDE.md

2423 of 2604 relevant lines covered (93.05%)

0.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.7
/src/Texy/Modules/HtmlModule.php
1
<?php declare(strict_types=1);
2

3
/**
4
 * This file is part of the Texy! (https://texy.nette.org)
5
 * Copyright (c) 2004 David Grudl (https://davidgrudl.com)
6
 */
7

8
namespace Texy\Modules;
9

10
use Texy;
11
use Texy\HtmlElement;
12
use Texy\Patterns;
13
use function array_flip, explode, is_array, is_string, preg_match, preg_match_all, str_contains, str_ends_with, strtolower, strtoupper, strtr, substr, trim;
14
use const PREG_SET_ORDER;
15

16

17
/**
18
 * Processes HTML tags and comments in input text.
19
 */
20
final class HtmlModule extends Texy\Module
21
{
22
        /** pass HTML comments to output? */
23
        public bool $passComment = true;
24

25

26
        public function __construct(Texy\Texy $texy)
1✔
27
        {
28
                $this->texy = $texy;
1✔
29

30
                $texy->addHandler('htmlComment', $this->solveComment(...));
1✔
31
                $texy->addHandler('htmlTag', $this->solveTag(...));
1✔
32

33
                $texy->registerLinePattern(
1✔
34
                        $this->patternTag(...),
1✔
35
                        '#<(/?)([a-z][a-z0-9_:-]{0,50})((?:\s++[a-z0-9\_:-]++|=\s*+"[^"' . Patterns::MARK . ']*+"|=\s*+\'[^\'' . Patterns::MARK . ']*+\'|=[^\s>' . Patterns::MARK . ']++)*)\s*+(/?)>#isu',
1✔
36
                        'html/tag',
1✔
37
                );
38

39
                $texy->registerLinePattern(
1✔
40
                        $this->patternComment(...),
1✔
41
                        '#<!--([^' . Patterns::MARK . ']*?)-->#is',
1✔
42
                        'html/comment',
1✔
43
                );
44
        }
1✔
45

46

47
        /**
48
         * Callback for: <!-- comment -->.
49
         * @param  string[]  $matches
50
         */
51
        public function patternComment(Texy\LineParser $parser, array $matches): HtmlElement|string|null
1✔
52
        {
53
                [, $mComment] = $matches;
1✔
54
                return $this->texy->invokeAroundHandlers('htmlComment', $parser, [$mComment]);
1✔
55
        }
56

57

58
        /**
59
         * Callback for: <tag attr="...">.
60
         * @param  string[]  $matches
61
         */
62
        public function patternTag(Texy\LineParser $parser, array $matches): ?string
1✔
63
        {
64
                [, $mEnd, $mTag, $mAttr, $mEmpty] = $matches;
1✔
65
                // [1] => /
66
                // [2] => tag
67
                // [3] => attributes
68
                // [4] => /
69

70
                $isStart = $mEnd !== '/';
1✔
71
                $isEmpty = $mEmpty === '/';
1✔
72
                if (!$isEmpty && str_ends_with($mAttr, '/')) { // uvizlo v $mAttr?
1✔
73
                        $mAttr = substr($mAttr, 0, -1);
×
74
                        $isEmpty = true;
×
75
                }
76

77
                // error - can't close empty element
78
                if ($isEmpty && !$isStart) {
1✔
79
                        return null;
×
80
                }
81

82
                // error - end element with atttrs
83
                $mAttr = trim(strtr($mAttr, "\n", ' '));
1✔
84
                if ($mAttr && !$isStart) {
1✔
85
                        return null;
1✔
86
                }
87

88
                $el = new HtmlElement($mTag);
1✔
89
                if ($isStart) {
1✔
90
                        $el->attrs = $this->parseAttributes($mAttr);
1✔
91
                }
92

93
                $res = $this->texy->invokeAroundHandlers('htmlTag', $parser, [$el, $isStart, $isEmpty]);
1✔
94

95
                if ($res instanceof HtmlElement) {
1✔
96
                        return $this->texy->protect($isStart ? $res->startTag() : $res->endTag(), $res->getContentType());
1✔
97
                }
98

99
                return $res;
1✔
100
        }
101

102

103
        /**
104
         * Finish invocation.
105
         */
106
        private function solveTag(
1✔
107
                Texy\HandlerInvocation $invocation,
108
                HtmlElement $el,
109
                bool $isStart,
110
                ?bool $forceEmpty = null,
111
        ): ?HtmlElement
112
        {
113
                $texy = $this->texy;
1✔
114

115
                // tag & attibutes
116
                $allowedTags = $texy->allowedTags; // speed-up
1✔
117
                if (!$allowedTags) {
1✔
118
                        return null; // all tags are disabled
1✔
119
                }
120

121
                // convert case
122
                $name = $el->getName();
1✔
123
                assert($name !== null);
124
                $lower = strtolower($name);
1✔
125
                if (isset($texy->getDTD()[$lower]) || $name === strtoupper($name)) {
1✔
126
                        // complete UPPER convert to lower
127
                        $name = $lower;
1✔
128
                        $el->setName($name);
1✔
129
                }
130

131
                if (is_array($allowedTags)) {
1✔
132
                        if (!isset($allowedTags[$name])) {
1✔
133
                                return null;
1✔
134
                        }
135
                } else { // allowedTags === Texy\Texy::ALL
136
                        if ($forceEmpty) {
1✔
137
                                $el->setName($name, empty: true);
1✔
138
                        }
139
                }
140

141
                // end tag? we are finished
142
                if (!$isStart) {
1✔
143
                        return $el;
1✔
144
                }
145

146
                $this->applyAttrs($el->attrs, is_array($allowedTags) ? $allowedTags[$name] : $texy::ALL);
1✔
147
                $this->applyClasses($el->attrs, $texy->getAllowedProps()[0]);
1✔
148
                $this->applyStyles($el->attrs, $texy->getAllowedProps()[1]);
1✔
149
                if (!$this->validateAttrs($el, $texy)) {
1✔
150
                        return null;
1✔
151
                }
152

153
                $el->validateAttrs($texy->getDTD());
1✔
154

155
                return $el;
1✔
156
        }
157

158

159
        /**
160
         * Finish invocation.
161
         */
162
        private function solveComment(Texy\HandlerInvocation $invocation, string $content): string
1✔
163
        {
164
                if (!$this->passComment) {
1✔
165
                        return '';
×
166
                }
167

168
                // sanitize comment
169
                $content = Texy\Regexp::replace($content, '#-{2,}#', ' - ');
1✔
170
                $content = trim($content, '-');
1✔
171

172
                return $this->texy->protect('<!--' . $content . '-->', Texy\Texy::CONTENT_MARKUP);
1✔
173
        }
174

175

176
        /**
177
         * @param  array<string, array<string|int|bool>|string|int|bool|null>  $attrs
178
         * @param  bool|string[]  $allowedAttrs
179
         */
180
        private function applyAttrs(array &$attrs, bool|array $allowedAttrs): void
1✔
181
        {
182
                if (!$allowedAttrs) {
1✔
183
                        $attrs = [];
1✔
184

185
                } elseif (is_array($allowedAttrs)) {
1✔
186
                        // skip disabled
187
                        $allowedAttrs = array_flip($allowedAttrs);
1✔
188
                        foreach ($attrs as $key => $foo) {
1✔
189
                                if (!isset($allowedAttrs[$key])) {
1✔
190
                                        unset($attrs[$key]);
×
191
                                }
192
                        }
193
                }
194
        }
1✔
195

196

197
        /**
198
         * @param  array<string, string|int|bool|array<string|int|bool>|null>  $attrs
199
         * @param  array<string, int>|bool  $allowedClasses
200
         */
201
        private function applyClasses(array &$attrs, bool|array $allowedClasses): void
1✔
202
        {
203
                if (!isset($attrs['class'])) {
1✔
204
                } elseif (is_array($allowedClasses)) {
1✔
205
                        $attrs['class'] = is_string($attrs['class']) ? explode(' ', $attrs['class']) : (array) $attrs['class'];
×
206
                        foreach ($attrs['class'] as $key => $value) {
×
207
                                if (!isset($allowedClasses[$value])) {
×
208
                                        unset($attrs['class'][$key]); // id & class are case-sensitive
×
209
                                }
210
                        }
211
                } elseif ($allowedClasses !== Texy\Texy::ALL) {
1✔
212
                        $attrs['class'] = null;
×
213
                }
214

215
                if (!isset($attrs['id'])) {
1✔
216
                } elseif (is_array($allowedClasses)) {
1✔
217
                        if (!is_string($attrs['id']) || !isset($allowedClasses['#' . $attrs['id']])) {
×
218
                                $attrs['id'] = null;
×
219
                        }
220

221
                } elseif ($allowedClasses !== Texy\Texy::ALL) {
1✔
222
                        $attrs['id'] = null;
×
223
                }
224
        }
1✔
225

226

227
        /**
228
         * @param  array<string, string|int|bool|array<string|int|bool>|null>  $attrs
229
         * @param  array<string, int>|bool  $allowedStyles
230
         */
231
        private function applyStyles(array &$attrs, bool|array $allowedStyles): void
1✔
232
        {
233
                if (!isset($attrs['style'])) {
1✔
234
                } elseif (is_array($allowedStyles)) {
1✔
235
                        if (is_string($attrs['style'])) {
×
236
                                $parts = explode(';', $attrs['style']);
×
237
                                $attrs['style'] = [];
×
238
                                foreach ($parts as $value) {
×
239
                                        if (count($pair = explode(':', $value, 2)) === 2) {
×
240
                                                $attrs['style'][trim($pair[0])] = trim($pair[1]);
×
241
                                        }
242
                                }
243
                        } else {
244
                                $attrs['style'] = (array) $attrs['style'];
×
245
                        }
246

247
                        foreach ($attrs['style'] as $key => $value) {
×
248
                                if (!isset($allowedStyles[strtolower((string) $key)])) { // CSS is case-insensitive
×
249
                                        unset($attrs['style'][$key]);
×
250
                                }
251
                        }
252
                } elseif ($allowedStyles !== Texy\Texy::ALL) {
1✔
253
                        $attrs['style'] = null;
×
254
                }
255
        }
1✔
256

257

258
        private function validateAttrs(HtmlElement $el, Texy\Texy $texy): bool
1✔
259
        {
260
                foreach (['src', 'href', 'name', 'id'] as $attr) {
1✔
261
                        if (isset($el->attrs[$attr])) {
1✔
262
                                $el->attrs[$attr] = is_string($el->attrs[$attr])
1✔
263
                                        ? trim($el->attrs[$attr])
1✔
264
                                        : '';
×
265
                                if ($el->attrs[$attr] === '') {
1✔
266
                                        unset($el->attrs[$attr]);
×
267
                                }
268
                        }
269
                }
270

271
                $name = $el->getName();
1✔
272
                if ($name === 'img') {
1✔
273
                        if (!isset($el->attrs['src'])) {
1✔
274
                                return false;
×
275
                        }
276

277
                        assert(is_string($el->attrs['src']));
278
                        if (!$texy->checkURL($el->attrs['src'], $texy::FILTER_IMAGE)) {
1✔
279
                                return false;
×
280
                        }
281

282
                        $texy->summary['images'][] = $el->attrs['src'];
1✔
283

284
                } elseif ($name === 'a') {
1✔
285
                        if (!isset($el->attrs['href']) && !isset($el->attrs['name']) && !isset($el->attrs['id'])) {
1✔
286
                                return false;
1✔
287
                        }
288

289
                        if (isset($el->attrs['href'])) {
1✔
290
                                assert(is_string($el->attrs['href']));
291
                                if ($texy->linkModule->forceNoFollow && str_contains($el->attrs['href'], '//')) {
1✔
292
                                        $el->attrs['rel'] = (array) ($el->attrs['rel'] ?? []);
1✔
293
                                        $el->attrs['rel'][] = 'nofollow';
1✔
294
                                }
295

296
                                if (!$texy->checkURL($el->attrs['href'], $texy::FILTER_ANCHOR)) {
1✔
297
                                        return false;
1✔
298
                                }
299

300
                                $texy->summary['links'][] = $el->attrs['href'];
1✔
301
                        }
302

303
                } elseif (preg_match('#^h[1-6]#i', $name ?? '')) {
1✔
304
                        $texy->headingModule->TOC[] = [
1✔
305
                                'el' => $el,
1✔
306
                                'level' => (int) substr($name, 1),
1✔
307
                                'type' => 'html',
1✔
308
                        ];
309
                }
310

311
                return true;
1✔
312
        }
313

314

315
        /** @return array<string, string|bool> */
316
        private function parseAttributes(string $attrs): array
1✔
317
        {
318
                $matches = $res = [];
1✔
319
                preg_match_all(
1✔
320
                        '#([a-z0-9\_:-]+)\s*(?:=\s*(\'[^\']*\'|"[^"]*"|[^\'"\s]+))?()#isu',
1✔
321
                        $attrs,
1✔
322
                        $matches,
1✔
323
                        PREG_SET_ORDER,
1✔
324
                );
325

326
                foreach ($matches as $m) {
1✔
327
                        $key = strtolower($m[1]);
1✔
328
                        $value = $m[2];
1✔
329
                        if ($value == null) {
1✔
330
                                $res[$key] = true;
1✔
331
                        } elseif ($value[0] === '\'' || $value[0] === '"') {
1✔
332
                                $res[$key] = Texy\Helpers::unescapeHtml(substr($value, 1, -1));
1✔
333
                        } else {
334
                                $res[$key] = Texy\Helpers::unescapeHtml($value);
1✔
335
                        }
336
                }
337

338
                return $res;
1✔
339
        }
340
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc