• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

dg / texy / 12879605443

21 Jan 2025 03:31AM UTC coverage: 92.224% (+0.03%) from 92.197%
12879605443

push

github

dg
regexp: uses unmatched as null (BC break)

14 of 14 new or added lines in 6 files covered. (100.0%)

101 existing lines in 14 files now uncovered.

2372 of 2572 relevant lines covered (92.22%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.08
/src/Texy/Modules/LinkModule.php
1
<?php
2

3
/**
4
 * This file is part of the Texy! (https://texy.info)
5
 * Copyright (c) 2004 David Grudl (https://davidgrudl.com)
6
 */
7

8
declare(strict_types=1);
9

10
namespace Texy\Modules;
11

12
use Texy;
13
use Texy\HandlerInvocation;
14
use Texy\LineParser;
15
use Texy\Link;
16
use Texy\Patterns;
17
use Texy\Regexp;
18

19

20
/**
21
 * Links module.
22
 */
23
final class LinkModule extends Texy\Module
24
{
25
        /** root of relative links */
26
        public ?string $root = null;
27

28
        /** linked image class */
29
        public ?string $imageClass = null;
30

31
        /** always use rel="nofollow" for absolute links? */
32
        public bool $forceNoFollow = false;
33

34
        /** shorten URLs to more readable form? */
35
        public bool $shorten = true;
36

37
        /** @var array<string, Link> link references */
38
        private array $references = [];
39

40
        /** @var array<string, bool> */
41
        private static array $livelock;
42

43
        private static string $EMAIL;
44

45

46
        public function __construct(Texy\Texy $texy)
1✔
47
        {
48
                $this->texy = $texy;
1✔
49

50
                $texy->allowed['link/definition'] = true;
1✔
51
                $texy->addHandler('newReference', $this->solveNewReference(...));
1✔
52
                $texy->addHandler('linkReference', $this->solve(...));
1✔
53
                $texy->addHandler('linkEmail', $this->solveUrlEmail(...));
1✔
54
                $texy->addHandler('linkURL', $this->solveUrlEmail(...));
1✔
55
                $texy->addHandler('beforeParse', $this->beforeParse(...));
1✔
56

57
                // [reference]
58
                $texy->registerLinePattern(
1✔
59
                        $this->patternReference(...),
1✔
60
                        '~(
61
                                \[
62
                                [^\[\]\*\n' . Patterns::MARK . ']++  # reference
1✔
63
                                \]
64
                        )~U',
65
                        'link/reference',
1✔
66
                );
67

68
                // direct url; characters not allowed in URL <>[\]^`{|}
69
                $texy->registerLinePattern(
1✔
70
                        $this->patternUrlEmail(...),
1✔
71
                        '~
72
                                (?<= ^ | [\s([<:\x17] )            # must be preceded by these chars
73
                                (?: https?:// | www\. | ftp:// )   # protocol or www
74
                                [0-9.' . Patterns::CHAR . '-]      # first char
75
                                [/\d' . Patterns::CHAR . '+\.\~%&?@=_:;#$!,*()\x{ad}-]{1,1000}  # URL body
76
                                [/\d' . Patterns::CHAR . '+\~?@=_#$*]  # last char
1✔
77
                        ~',
78
                        'link/url',
1✔
79
                        '~(?: https?:// | www\. | ftp://)~',
1✔
80
                );
81

82
                // direct email
83
                self::$EMAIL = '
1✔
84
                        [' . Patterns::CHAR . ']                 # first char
85
                        [0-9.+_' . Patterns::CHAR . '-]{0,63}    # local part
86
                        @
87
                        [0-9.+_' . Patterns::CHAR . '\x{ad}-]{1,252} # domain
88
                        \.
89
                        [' . Patterns::CHAR . '\x{ad}]{2,19}     # TLD
90
                ';
91
                $texy->registerLinePattern(
1✔
92
                        $this->patternUrlEmail(...),
1✔
93
                        '~
94
                                (?<= ^ | [\s([<\x17] )             # must be preceded by these chars
95
                                ' . self::$EMAIL . '
1✔
96
                        ~',
97
                        'link/email',
1✔
98
                        '~' . self::$EMAIL . '~',
1✔
99
                );
100
        }
1✔
101

102

103
        /**
104
         * Text pre-processing.
105
         */
106
        private function beforeParse(Texy\Texy $texy, &$text): void
1✔
107
        {
108
                self::$livelock = [];
1✔
109

110
                // [la trine]: http://www.latrine.cz/ text odkazu .(title)[class]{style}
111
                if (!empty($texy->allowed['link/definition'])) {
1✔
112
                        $text = Texy\Regexp::replace(
1✔
113
                                $text,
1✔
114
                                '~^
115
                                        \[
116
                                        ( [^\[\]#\?\*\n]{1,100} )         # reference (1)
117
                                        \] : \ ++
118
                                        ( \S{1,1000} )                    # URL (2)
119
                                        ( [\ \t] .{1,1000} )?             # optional description (3)
120
                                        ' . Patterns::MODIFIER . '?       # modifier (4)
1✔
121
                                        \s*
122
                                $~mU',
123
                                $this->patternReferenceDef(...),
1✔
124
                        );
125
                }
126
        }
1✔
127

128

129
        /**
130
         * Callback for: [la trine]: http://www.latrine.cz/ text odkazu .(title)[class]{style}.
131
         */
132
        private function patternReferenceDef(array $matches): string
1✔
133
        {
134
                [, $mRef, $mLink, $mLabel, $mMod] = $matches;
1✔
135
                // [1] => [ (reference) ]
136
                // [2] => link
137
                // [3] => ...
138
                // [4] => .(title)[class]{style}
139

140
                $link = new Link($mLink);
1✔
141
                $link->label = trim($mLabel ?? '');
1✔
142
                $link->modifier->setProperties($mMod);
1✔
143
                $this->checkLink($link);
1✔
144
                $this->addReference($mRef, $link);
1✔
145
                return '';
1✔
146
        }
147

148

149
        /**
150
         * Callback for: [ref].
151
         */
152
        public function patternReference(LineParser $parser, array $matches): Texy\HtmlElement|string|null
1✔
153
        {
154
                [, $mRef] = $matches;
1✔
155
                // [1] => [ref]
156

157
                $texy = $this->texy;
1✔
158
                $name = substr($mRef, 1, -1);
1✔
159
                $link = $this->getReference($name);
1✔
160

161
                if (!$link) {
1✔
162
                        return $texy->invokeAroundHandlers('newReference', $parser, [$name]);
1✔
163
                }
164

165
                $link->type = $link::BRACKET;
1✔
166

167
                if ($link->label != '') { // null or ''
1✔
168
                        // prevent circular references
169
                        if (isset(self::$livelock[$link->name])) {
1✔
UNCOV
170
                                $content = $link->label;
×
171
                        } else {
172
                                self::$livelock[$link->name] = true;
1✔
173
                                $el = new Texy\HtmlElement;
1✔
174
                                $lineParser = new LineParser($texy, $el);
1✔
175
                                $lineParser->parse($link->label);
1✔
176
                                $content = $el->toString($texy);
1✔
177
                                unset(self::$livelock[$link->name]);
1✔
178
                        }
179
                } else {
180
                        $content = $this->textualUrl($link);
1✔
181
                        $content = $this->texy->protect($content, $texy::CONTENT_TEXTUAL);
1✔
182
                }
183

184
                return $texy->invokeAroundHandlers('linkReference', $parser, [$link, $content]);
1✔
185
        }
186

187

188
        /**
189
         * Callback for: http://davidgrudl.com david@grudl.com.
190
         */
191
        public function patternUrlEmail(LineParser $parser, array $matches, string $name): Texy\HtmlElement|string|null
1✔
192
        {
193
                [$mURL] = $matches;
1✔
194
                // [0] => URL
195

196
                $link = new Link($mURL);
1✔
197
                $this->checkLink($link);
1✔
198

199
                return $this->texy->invokeAroundHandlers(
1✔
200
                        $name === 'link/email' ? 'linkEmail' : 'linkURL',
1✔
201
                        $parser,
202
                        [$link],
1✔
203
                );
204
        }
205

206

207
        /**
208
         * Adds new named reference.
209
         */
210
        public function addReference(string $name, Link $link): void
1✔
211
        {
212
                $link->name = Texy\Helpers::toLower($name);
1✔
213
                $this->references[$link->name] = $link;
1✔
214
        }
1✔
215

216

217
        /**
218
         * Returns named reference.
219
         */
220
        public function getReference(string $name): ?Link
1✔
221
        {
222
                $name = Texy\Helpers::toLower($name);
1✔
223
                if (isset($this->references[$name])) {
1✔
224
                        return clone $this->references[$name];
1✔
225

226
                } else {
227
                        $pos = strpos($name, '?');
1✔
228
                        if ($pos === false) {
1✔
229
                                $pos = strpos($name, '#');
1✔
230
                        }
231

232
                        if ($pos !== false) { // try to extract ?... #... part
1✔
233
                                $name2 = substr($name, 0, $pos);
1✔
234
                                if (isset($this->references[$name2])) {
1✔
235
                                        $link = clone $this->references[$name2];
1✔
236
                                        $link->URL .= substr($name, $pos);
1✔
237
                                        return $link;
1✔
238
                                }
239
                        }
240
                }
241

242
                return null;
1✔
243
        }
244

245

246
        public function factoryLink(string $dest, ?string $mMod, ?string $label): Link
1✔
247
        {
248
                $texy = $this->texy;
1✔
249
                $type = Link::COMMON;
1✔
250

251
                // [ref]
252
                if (strlen($dest) > 1 && $dest[0] === '[' && $dest[1] !== '*') {
1✔
253
                        $type = Link::BRACKET;
1✔
254
                        $dest = substr($dest, 1, -1);
1✔
255
                        $link = $this->getReference($dest);
1✔
256

257
                // [* image *]
258
                } elseif (strlen($dest) > 1 && $dest[0] === '[' && $dest[1] === '*') {
1✔
259
                        $type = Link::IMAGE;
1✔
260
                        $dest = trim(substr($dest, 2, -2));
1✔
261
                        $image = $texy->imageModule->getReference($dest);
1✔
262
                        if ($image) {
1✔
263
                                $link = new Link($image->linkedURL ?? $image->URL);
1✔
264
                                $link->modifier = $image->modifier;
1✔
265
                        }
266
                }
267

268
                if (empty($link)) {
1✔
269
                        $link = new Link(trim($dest));
1✔
270
                        $this->checkLink($link);
1✔
271
                }
272

273
                if (str_contains((string) $link->URL, '%s')) {
1✔
UNCOV
274
                        $link->URL = str_replace('%s', urlencode($texy->stringToText($label)), $link->URL);
×
275
                }
276

277
                $link->modifier->setProperties($mMod);
1✔
278
                $link->type = $type;
1✔
279
                return $link;
1✔
280
        }
281

282

283
        /**
284
         * Finish invocation.
285
         */
286
        public function solve(
1✔
287
                ?HandlerInvocation $invocation,
288
                Link $link,
289
                Texy\HtmlElement|string|null $content = null,
290
        ): Texy\HtmlElement|string
291
        {
292
                if ($link->URL == null) {
1✔
293
                        return $content;
1✔
294
                }
295

296
                $texy = $this->texy;
1✔
297

298
                $el = new Texy\HtmlElement('a');
1✔
299

300
                if (empty($link->modifier)) {
1✔
UNCOV
301
                        $nofollow = false;
×
302
                } else {
303
                        $nofollow = isset($link->modifier->classes['nofollow']);
1✔
304
                        unset($link->modifier->classes['nofollow']);
1✔
305
                        $el->attrs['href'] = null; // trick - move to front
1✔
306
                        $link->modifier->decorate($texy, $el);
1✔
307
                }
308

309
                if ($link->type === Link::IMAGE) {
1✔
310
                        // image
311
                        $el->attrs['href'] = Texy\Helpers::prependRoot($link->URL, $texy->imageModule->linkedRoot);
1✔
312
                        if ($this->imageClass) {
1✔
313
                                $el->attrs['class'][] = $this->imageClass;
1✔
314
                        }
315
                } else {
316
                        $el->attrs['href'] = Texy\Helpers::prependRoot($link->URL, $this->root);
1✔
317

318
                        // rel="nofollow"
319
                        if ($nofollow || ($this->forceNoFollow && str_contains($el->attrs['href'], '//'))) {
1✔
320
                                $el->attrs['rel'] = 'nofollow';
1✔
321
                        }
322
                }
323

324
                if ($content !== null) {
1✔
325
                        $el->add($content);
1✔
326
                }
327

328
                $texy->summary['links'][] = $el->attrs['href'];
1✔
329

330
                return $el;
1✔
331
        }
332

333

334
        /**
335
         * Finish invocation.
336
         */
337
        private function solveUrlEmail(HandlerInvocation $invocation, Link $link): Texy\HtmlElement|string
1✔
338
        {
339
                $content = $this->textualUrl($link);
1✔
340
                $content = $this->texy->protect($content, Texy\Texy::CONTENT_TEXTUAL);
1✔
341
                return $this->solve(null, $link, $content);
1✔
342
        }
343

344

345
        /**
346
         * Finish invocation.
347
         */
348
        private function solveNewReference(HandlerInvocation $invocation, string $name)
1✔
349
        {
350
                // no change
351
        }
1✔
352

353

354
        /**
355
         * Checks and corrects $URL.
356
         */
357
        private function checkLink(Link $link): void
1✔
358
        {
359
                // remove soft hyphens; if not removed by Texy\Texy::process()
360
                $link->URL = str_replace("\u{AD}", '', $link->URL);
1✔
361

362
                if (strncasecmp($link->URL, 'www.', 4) === 0) {
1✔
363
                        // special supported case
364
                        $link->URL = 'http://' . $link->URL;
1✔
365

366
                } elseif (Regexp::match($link->URL, '~' . self::$EMAIL . '$~A')) {
1✔
367
                        // email
368
                        $link->URL = 'mailto:' . $link->URL;
1✔
369

370
                } elseif (!$this->texy->checkURL($link->URL, Texy\Texy::FILTER_ANCHOR)) {
1✔
371
                        $link->URL = null;
1✔
372

373
                } else {
374
                        $link->URL = str_replace('&amp;', '&', $link->URL); // replace unwanted &amp;
1✔
375
                }
376
        }
1✔
377

378

379
        /**
380
         * Returns textual representation of URL.
381
         */
382
        private function textualUrl(Link $link): string
1✔
383
        {
384
                if ($this->texy->obfuscateEmail && Regexp::match($link->raw, '~^' . self::$EMAIL . '$~')) { // email
1✔
385
                        return str_replace('@', '&#64;<!-- -->', $link->raw);
1✔
386
                }
387

388
                if ($this->shorten && Regexp::match($link->raw, '~^(https?://|ftp://|www\.|/)~i')) {
1✔
389
                        $raw = strncasecmp($link->raw, 'www.', 4) === 0
1✔
390
                                ? 'none://' . $link->raw
1✔
391
                                : $link->raw;
1✔
392

393
                        // parse_url() in PHP damages UTF-8 - use regular expression
394
                        if (!($parts = Regexp::match($raw, '~^
1✔
395
                                (?: (?P<scheme> [a-z]+ ) : )?
396
                                (?: // (?P<host> [^/?#]+ ) )?
397
                                (?P<path> (?: / | ^ ) (?! / ) [^?#]* )?
398
                                (?: \? (?P<query> [^#]* ) )?
399
                                (?: \# (?P<fragment> .* ) )?
400
                                $
401
                        ~'))) {
UNCOV
402
                                return $link->raw;
×
403
                        }
404

405
                        $res = '';
1✔
406
                        if ($parts['scheme'] !== null && $parts['scheme'] !== 'none') {
1✔
407
                                $res .= $parts['scheme'] . '://';
1✔
408
                        }
409

410
                        if ($parts['host'] !== null) {
1✔
411
                                $res .= $parts['host'];
1✔
412
                        }
413

414
                        if ($parts['path'] !== null) {
1✔
415
                                $res .= (iconv_strlen($parts['path'], 'UTF-8') > 16 ? ("/\u{2026}" . iconv_substr($parts['path'], -12, 12, 'UTF-8')) : $parts['path']);
1✔
416
                        }
417

418
                        if ($parts['query'] > '') {
1✔
419
                                $res .= iconv_strlen($parts['query'], 'UTF-8') > 4
1✔
UNCOV
420
                                        ? "?\u{2026}"
×
421
                                        : ('?' . $parts['query']);
1✔
422
                        } elseif ($parts['fragment'] > '') {
1✔
423
                                $res .= iconv_strlen($parts['fragment'], 'UTF-8') > 4
1✔
424
                                        ? "#\u{2026}"
1✔
425
                                        : ('#' . $parts['fragment']);
1✔
426
                        }
427

428
                        return $res;
1✔
429
                }
430

431
                return $link->raw;
1✔
432
        }
433
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc