• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

nette / utils / 15764605036

19 Jun 2025 06:55PM UTC coverage: 92.467% (-0.05%) from 92.516%
15764605036

push

github

dg
Strings::trim() trims Line Separator and Ideographic Space (#326)

These characters occur in our data.

There are more characters listed at https://en.wikipedia.org/wiki/Whitespace_character#Unicode, maybe all of them should be included?

2050 of 2217 relevant lines covered (92.47%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.34
/src/Utils/Strings.php
1
<?php
2

3
/**
4
 * This file is part of the Nette Framework (https://nette.org)
5
 * Copyright (c) 2004 David Grudl (https://davidgrudl.com)
6
 */
7

8
declare(strict_types=1);
9

10
namespace Nette\Utils;
11

12
use JetBrains\PhpStorm\Language;
13
use Nette;
14
use function array_keys, array_map, array_shift, array_values, bin2hex, class_exists, defined, extension_loaded, function_exists, htmlspecialchars, htmlspecialchars_decode, iconv, iconv_strlen, iconv_substr, implode, in_array, is_array, is_callable, is_int, is_object, is_string, key, max, mb_convert_case, mb_strlen, mb_strtolower, mb_strtoupper, mb_substr, pack, preg_last_error, preg_last_error_msg, preg_quote, preg_replace, str_contains, str_ends_with, str_repeat, str_replace, str_starts_with, strlen, strpos, strrev, strrpos, strtolower, strtoupper, strtr, substr, trim, unpack, utf8_decode;
15
use const ENT_IGNORE, ENT_NOQUOTES, ICONV_IMPL, MB_CASE_TITLE, PHP_EOL, PREG_OFFSET_CAPTURE, PREG_PATTERN_ORDER, PREG_SET_ORDER, PREG_SPLIT_DELIM_CAPTURE, PREG_SPLIT_NO_EMPTY, PREG_SPLIT_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL;
16

17

18
/**
19
 * String tools library.
20
 */
21
class Strings
22
{
23
        use Nette\StaticClass;
24

25
        public const TrimCharacters = " \t\n\r\0\x0B\u{A0}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{200B}\u{2028}\u{3000}";
26

27
        /** @deprecated use Strings::TrimCharacters */
28
        public const TRIM_CHARACTERS = self::TrimCharacters;
29

30

31
        /**
32
         * @deprecated use Nette\Utils\Validators::isUnicode()
33
         */
34
        public static function checkEncoding(string $s): bool
1✔
35
        {
36
                return $s === self::fixEncoding($s);
1✔
37
        }
38

39

40
        /**
41
         * Removes all invalid UTF-8 characters from a string.
42
         */
43
        public static function fixEncoding(string $s): string
1✔
44
        {
45
                // removes xD800-xDFFF, x110000 and higher
46
                return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
1✔
47
        }
48

49

50
        /**
51
         * Returns a specific character in UTF-8 from code point (number in range 0x0000..D7FF or 0xE000..10FFFF).
52
         * @throws Nette\InvalidArgumentException if code point is not in valid range
53
         */
54
        public static function chr(int $code): string
1✔
55
        {
56
                if ($code < 0 || ($code >= 0xD800 && $code <= 0xDFFF) || $code > 0x10FFFF) {
1✔
57
                        throw new Nette\InvalidArgumentException('Code point must be in range 0x0 to 0xD7FF or 0xE000 to 0x10FFFF.');
1✔
58
                } elseif (!extension_loaded('iconv')) {
1✔
59
                        throw new Nette\NotSupportedException(__METHOD__ . '() requires ICONV extension that is not loaded.');
×
60
                }
61

62
                return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
1✔
63
        }
64

65

66
        /**
67
         * Returns a code point of specific character in UTF-8 (number in range 0x0000..D7FF or 0xE000..10FFFF).
68
         */
69
        public static function ord(string $c): int
1✔
70
        {
71
                if (!extension_loaded('iconv')) {
1✔
72
                        throw new Nette\NotSupportedException(__METHOD__ . '() requires ICONV extension that is not loaded.');
×
73
                }
74

75
                $tmp = iconv('UTF-8', 'UTF-32BE//IGNORE', $c);
1✔
76
                if (!$tmp) {
1✔
77
                        throw new Nette\InvalidArgumentException('Invalid UTF-8 character "' . ($c === '' ? '' : '\x' . strtoupper(bin2hex($c))) . '".');
1✔
78
                }
79

80
                return unpack('N', $tmp)[1];
1✔
81
        }
82

83

84
        /**
85
         * @deprecated use str_starts_with()
86
         */
87
        public static function startsWith(string $haystack, string $needle): bool
1✔
88
        {
89
                return str_starts_with($haystack, $needle);
1✔
90
        }
91

92

93
        /**
94
         * @deprecated use str_ends_with()
95
         */
96
        public static function endsWith(string $haystack, string $needle): bool
1✔
97
        {
98
                return str_ends_with($haystack, $needle);
1✔
99
        }
100

101

102
        /**
103
         * @deprecated use str_contains()
104
         */
105
        public static function contains(string $haystack, string $needle): bool
1✔
106
        {
107
                return str_contains($haystack, $needle);
1✔
108
        }
109

110

111
        /**
112
         * Returns a part of UTF-8 string specified by starting position and length. If start is negative,
113
         * the returned string will start at the start'th character from the end of string.
114
         */
115
        public static function substring(string $s, int $start, ?int $length = null): string
1✔
116
        {
117
                if (function_exists('mb_substr')) {
1✔
118
                        return mb_substr($s, $start, $length, 'UTF-8'); // MB is much faster
1✔
119
                } elseif (!extension_loaded('iconv')) {
×
120
                        throw new Nette\NotSupportedException(__METHOD__ . '() requires extension ICONV or MBSTRING, neither is loaded.');
×
121
                } elseif ($length === null) {
×
122
                        $length = self::length($s);
×
123
                } elseif ($start < 0 && $length < 0) {
×
124
                        $start += self::length($s); // unifies iconv_substr behavior with mb_substr
×
125
                }
126

127
                return iconv_substr($s, $start, $length, 'UTF-8');
×
128
        }
129

130

131
        /**
132
         * Removes control characters, normalizes line breaks to `\n`, removes leading and trailing blank lines,
133
         * trims end spaces on lines, normalizes UTF-8 to the normal form of NFC.
134
         */
135
        public static function normalize(string $s): string
1✔
136
        {
137
                // convert to compressed normal form (NFC)
138
                if (class_exists('Normalizer', false) && ($n = \Normalizer::normalize($s, \Normalizer::FORM_C)) !== false) {
1✔
139
                        $s = $n;
1✔
140
                }
141

142
                $s = self::unixNewLines($s);
1✔
143

144
                // remove control characters; leave \t + \n
145
                $s = self::pcre('preg_replace', ['#[\x00-\x08\x0B-\x1F\x7F-\x9F]+#u', '', $s]);
1✔
146

147
                // right trim
148
                $s = self::pcre('preg_replace', ['#[\t ]+$#m', '', $s]);
1✔
149

150
                // leading and trailing blank lines
151
                $s = trim($s, "\n");
1✔
152

153
                return $s;
1✔
154
        }
155

156

157
        /** @deprecated use Strings::unixNewLines() */
158
        public static function normalizeNewLines(string $s): string
1✔
159
        {
160
                return self::unixNewLines($s);
1✔
161
        }
162

163

164
        /**
165
         * Converts line endings to \n used on Unix-like systems.
166
         * Line endings are: \n, \r, \r\n, U+2028 line separator, U+2029 paragraph separator.
167
         */
168
        public static function unixNewLines(string $s): string
1✔
169
        {
170
                return preg_replace("~\r\n?|\u{2028}|\u{2029}~", "\n", $s);
1✔
171
        }
172

173

174
        /**
175
         * Converts line endings to platform-specific, i.e. \r\n on Windows and \n elsewhere.
176
         * Line endings are: \n, \r, \r\n, U+2028 line separator, U+2029 paragraph separator.
177
         */
178
        public static function platformNewLines(string $s): string
1✔
179
        {
180
                return preg_replace("~\r\n?|\n|\u{2028}|\u{2029}~", PHP_EOL, $s);
1✔
181
        }
182

183

184
        /**
185
         * Converts UTF-8 string to ASCII, ie removes diacritics etc.
186
         */
187
        public static function toAscii(string $s): string
1✔
188
        {
189
                $iconv = defined('ICONV_IMPL') ? trim(ICONV_IMPL, '"\'') : null;
1✔
190
                static $transliterator = null;
1✔
191
                if ($transliterator === null) {
1✔
192
                        if (class_exists('Transliterator', false)) {
1✔
193
                                $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII');
1✔
194
                        } else {
195
                                trigger_error(__METHOD__ . "(): it is recommended to enable PHP extensions 'intl'.", E_USER_NOTICE);
×
196
                                $transliterator = false;
×
197
                        }
198
                }
199

200
                // remove control characters and check UTF-8 validity
201
                $s = self::pcre('preg_replace', ['#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s]);
1✔
202

203
                // transliteration (by Transliterator and iconv) is not optimal, replace some characters directly
204
                $s = strtr($s, ["\u{201E}" => '"', "\u{201C}" => '"', "\u{201D}" => '"', "\u{201A}" => "'", "\u{2018}" => "'", "\u{2019}" => "'", "\u{B0}" => '^', "\u{42F}" => 'Ya', "\u{44F}" => 'ya', "\u{42E}" => 'Yu', "\u{44E}" => 'yu', "\u{c4}" => 'Ae', "\u{d6}" => 'Oe', "\u{dc}" => 'Ue', "\u{1e9e}" => 'Ss', "\u{e4}" => 'ae', "\u{f6}" => 'oe', "\u{fc}" => 'ue', "\u{df}" => 'ss']); // „ “ ” ‚ ‘ ’ ° Я я Ю ю Ä Ö Ü ẞ ä ö ü ß
1✔
205
                if ($iconv !== 'libiconv') {
1✔
206
                        $s = strtr($s, ["\u{AE}" => '(R)', "\u{A9}" => '(c)', "\u{2026}" => '...', "\u{AB}" => '<<', "\u{BB}" => '>>', "\u{A3}" => 'lb', "\u{A5}" => 'yen', "\u{B2}" => '^2', "\u{B3}" => '^3', "\u{B5}" => 'u', "\u{B9}" => '^1', "\u{BA}" => 'o', "\u{BF}" => '?', "\u{2CA}" => "'", "\u{2CD}" => '_', "\u{2DD}" => '"', "\u{1FEF}" => '', "\u{20AC}" => 'EUR', "\u{2122}" => 'TM', "\u{212E}" => 'e', "\u{2190}" => '<-', "\u{2191}" => '^', "\u{2192}" => '->', "\u{2193}" => 'V', "\u{2194}" => '<->']); // ® © … « » £ ¥ ² ³ µ ¹ º ¿ ˊ ˍ ˝ ` € ™ ℮ ← ↑ → ↓ ↔
1✔
207
                }
208

209
                if ($transliterator) {
1✔
210
                        $s = $transliterator->transliterate($s);
1✔
211
                        // use iconv because The transliterator leaves some characters out of ASCII, eg → ʾ
212
                        if ($iconv === 'glibc') {
1✔
213
                                $s = strtr($s, '?', "\x01"); // temporarily hide ? to distinguish them from the garbage that iconv creates
1✔
214
                                $s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
1✔
215
                                $s = str_replace(['?', "\x01"], ['', '?'], $s); // remove garbage and restore ? characters
1✔
216
                        } elseif ($iconv === 'libiconv') {
×
217
                                $s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
×
218
                        } else { // null or 'unknown' (#216)
219
                                $s = self::pcre('preg_replace', ['#[^\x00-\x7F]++#', '', $s]); // remove non-ascii chars
1✔
220
                        }
221
                } elseif ($iconv === 'glibc' || $iconv === 'libiconv') {
×
222
                        // temporarily hide these characters to distinguish them from the garbage that iconv creates
223
                        $s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
×
224
                        if ($iconv === 'glibc') {
×
225
                                // glibc implementation is very limited. transliterate into Windows-1250 and then into ASCII, so most Eastern European characters are preserved
226
                                $s = iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s);
×
227
                                $s = strtr(
×
228
                                        $s,
×
229
                                        "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96\xa0\x8b\x97\x9b\xa6\xad\xb7",
×
230
                                        'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.',
×
231
                                );
232
                                $s = self::pcre('preg_replace', ['#[^\x00-\x7F]++#', '', $s]);
×
233
                        } else {
234
                                $s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
×
235
                        }
236

237
                        // remove garbage that iconv creates during transliteration (eg Ý -> Y')
238
                        $s = str_replace(['`', "'", '"', '^', '~', '?'], '', $s);
×
239
                        // restore temporarily hidden characters
240
                        $s = strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?');
×
241
                } else {
242
                        $s = self::pcre('preg_replace', ['#[^\x00-\x7F]++#', '', $s]); // remove non-ascii chars
×
243
                }
244

245
                return $s;
1✔
246
        }
247

248

249
        /**
250
         * Modifies the UTF-8 string to the form used in the URL, ie removes diacritics and replaces all characters
251
         * except letters of the English alphabet and numbers with a hyphens.
252
         */
253
        public static function webalize(string $s, ?string $charlist = null, bool $lower = true): string
1✔
254
        {
255
                $s = self::toAscii($s);
1✔
256
                if ($lower) {
1✔
257
                        $s = strtolower($s);
1✔
258
                }
259

260
                $s = self::pcre('preg_replace', ['#[^a-z0-9' . ($charlist !== null ? preg_quote($charlist, '#') : '') . ']+#i', '-', $s]);
1✔
261
                $s = trim($s, '-');
1✔
262
                return $s;
1✔
263
        }
264

265

266
        /**
267
         * Truncates a UTF-8 string to given maximal length, while trying not to split whole words. Only if the string is truncated,
268
         * an ellipsis (or something else set with third argument) is appended to the string.
269
         */
270
        public static function truncate(string $s, int $maxLen, string $append = "\u{2026}"): string
1✔
271
        {
272
                if (self::length($s) > $maxLen) {
1✔
273
                        $maxLen -= self::length($append);
1✔
274
                        if ($maxLen < 1) {
1✔
275
                                return $append;
1✔
276

277
                        } elseif ($matches = self::match($s, '#^.{1,' . $maxLen . '}(?=[\s\x00-/:-@\[-`{-~])#us')) {
1✔
278
                                return $matches[0] . $append;
1✔
279

280
                        } else {
281
                                return self::substring($s, 0, $maxLen) . $append;
1✔
282
                        }
283
                }
284

285
                return $s;
1✔
286
        }
287

288

289
        /**
290
         * Indents a multiline text from the left. Second argument sets how many indentation chars should be used,
291
         * while the indent itself is the third argument (*tab* by default).
292
         */
293
        public static function indent(string $s, int $level = 1, string $chars = "\t"): string
1✔
294
        {
295
                if ($level > 0) {
1✔
296
                        $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
1✔
297
                }
298

299
                return $s;
1✔
300
        }
301

302

303
        /**
304
         * Converts all characters of UTF-8 string to lower case.
305
         */
306
        public static function lower(string $s): string
1✔
307
        {
308
                return mb_strtolower($s, 'UTF-8');
1✔
309
        }
310

311

312
        /**
313
         * Converts the first character of a UTF-8 string to lower case and leaves the other characters unchanged.
314
         */
315
        public static function firstLower(string $s): string
1✔
316
        {
317
                return self::lower(self::substring($s, 0, 1)) . self::substring($s, 1);
1✔
318
        }
319

320

321
        /**
322
         * Converts all characters of a UTF-8 string to upper case.
323
         */
324
        public static function upper(string $s): string
1✔
325
        {
326
                return mb_strtoupper($s, 'UTF-8');
1✔
327
        }
328

329

330
        /**
331
         * Converts the first character of a UTF-8 string to upper case and leaves the other characters unchanged.
332
         */
333
        public static function firstUpper(string $s): string
1✔
334
        {
335
                return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
1✔
336
        }
337

338

339
        /**
340
         * Converts the first character of every word of a UTF-8 string to upper case and the others to lower case.
341
         */
342
        public static function capitalize(string $s): string
1✔
343
        {
344
                return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
1✔
345
        }
346

347

348
        /**
349
         * Compares two UTF-8 strings or their parts, without taking character case into account. If length is null, whole strings are compared,
350
         * if it is negative, the corresponding number of characters from the end of the strings is compared,
351
         * otherwise the appropriate number of characters from the beginning is compared.
352
         */
353
        public static function compare(string $left, string $right, ?int $length = null): bool
1✔
354
        {
355
                if (class_exists('Normalizer', false)) {
1✔
356
                        $left = \Normalizer::normalize($left, \Normalizer::FORM_D); // form NFD is faster
1✔
357
                        $right = \Normalizer::normalize($right, \Normalizer::FORM_D); // form NFD is faster
1✔
358
                }
359

360
                if ($length < 0) {
1✔
361
                        $left = self::substring($left, $length, -$length);
1✔
362
                        $right = self::substring($right, $length, -$length);
1✔
363
                } elseif ($length !== null) {
1✔
364
                        $left = self::substring($left, 0, $length);
1✔
365
                        $right = self::substring($right, 0, $length);
1✔
366
                }
367

368
                return self::lower($left) === self::lower($right);
1✔
369
        }
370

371

372
        /**
373
         * Finds the common prefix of strings or returns empty string if the prefix was not found.
374
         * @param  string[]  $strings
375
         */
376
        public static function findPrefix(array $strings): string
1✔
377
        {
378
                $first = array_shift($strings);
1✔
379
                for ($i = 0; $i < strlen($first); $i++) {
1✔
380
                        foreach ($strings as $s) {
1✔
381
                                if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
1✔
382
                                        while ($i && $first[$i - 1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
1✔
383
                                                $i--;
1✔
384
                                        }
385

386
                                        return substr($first, 0, $i);
1✔
387
                                }
388
                        }
389
                }
390

391
                return $first;
1✔
392
        }
393

394

395
        /**
396
         * Returns number of characters (not bytes) in UTF-8 string.
397
         * That is the number of Unicode code points which may differ from the number of graphemes.
398
         */
399
        public static function length(string $s): int
1✔
400
        {
401
                return match (true) {
402
                        extension_loaded('mbstring') => mb_strlen($s, 'UTF-8'),
1✔
403
                        extension_loaded('iconv') => iconv_strlen($s, 'UTF-8'),
×
404
                        default => strlen(@utf8_decode($s)), // deprecated
1✔
405
                };
406
        }
407

408

409
        /**
410
         * Removes all left and right side spaces (or the characters passed as second argument) from a UTF-8 encoded string.
411
         */
412
        public static function trim(string $s, string $charlist = self::TrimCharacters): string
1✔
413
        {
414
                $charlist = preg_quote($charlist, '#');
1✔
415
                return self::replace($s, '#^[' . $charlist . ']+|[' . $charlist . ']+$#Du', '');
1✔
416
        }
417

418

419
        /**
420
         * Pads a UTF-8 string to given length by prepending the $pad string to the beginning.
421
         * @param  non-empty-string  $pad
422
         */
423
        public static function padLeft(string $s, int $length, string $pad = ' '): string
1✔
424
        {
425
                $length = max(0, $length - self::length($s));
1✔
426
                $padLen = self::length($pad);
1✔
427
                return str_repeat($pad, (int) ($length / $padLen)) . self::substring($pad, 0, $length % $padLen) . $s;
1✔
428
        }
429

430

431
        /**
432
         * Pads UTF-8 string to given length by appending the $pad string to the end.
433
         * @param  non-empty-string  $pad
434
         */
435
        public static function padRight(string $s, int $length, string $pad = ' '): string
1✔
436
        {
437
                $length = max(0, $length - self::length($s));
1✔
438
                $padLen = self::length($pad);
1✔
439
                return $s . str_repeat($pad, (int) ($length / $padLen)) . self::substring($pad, 0, $length % $padLen);
1✔
440
        }
441

442

443
        /**
444
         * Reverses UTF-8 string.
445
         */
446
        public static function reverse(string $s): string
1✔
447
        {
448
                if (!extension_loaded('iconv')) {
1✔
449
                        throw new Nette\NotSupportedException(__METHOD__ . '() requires ICONV extension that is not loaded.');
×
450
                }
451

452
                return iconv('UTF-32LE', 'UTF-8', strrev(iconv('UTF-8', 'UTF-32BE', $s)));
1✔
453
        }
454

455

456
        /**
457
         * Returns part of $haystack before $nth occurence of $needle or returns null if the needle was not found.
458
         * Negative value means searching from the end.
459
         */
460
        public static function before(string $haystack, string $needle, int $nth = 1): ?string
1✔
461
        {
462
                $pos = self::pos($haystack, $needle, $nth);
1✔
463
                return $pos === null
1✔
464
                        ? null
1✔
465
                        : substr($haystack, 0, $pos);
1✔
466
        }
467

468

469
        /**
470
         * Returns part of $haystack after $nth occurence of $needle or returns null if the needle was not found.
471
         * Negative value means searching from the end.
472
         */
473
        public static function after(string $haystack, string $needle, int $nth = 1): ?string
1✔
474
        {
475
                $pos = self::pos($haystack, $needle, $nth);
1✔
476
                return $pos === null
1✔
477
                        ? null
1✔
478
                        : substr($haystack, $pos + strlen($needle));
1✔
479
        }
480

481

482
        /**
483
         * Returns position in characters of $nth occurence of $needle in $haystack or null if the $needle was not found.
484
         * Negative value of `$nth` means searching from the end.
485
         */
486
        public static function indexOf(string $haystack, string $needle, int $nth = 1): ?int
1✔
487
        {
488
                $pos = self::pos($haystack, $needle, $nth);
1✔
489
                return $pos === null
1✔
490
                        ? null
1✔
491
                        : self::length(substr($haystack, 0, $pos));
1✔
492
        }
493

494

495
        /**
496
         * Returns position in characters of $nth occurence of $needle in $haystack or null if the needle was not found.
497
         */
498
        private static function pos(string $haystack, string $needle, int $nth = 1): ?int
1✔
499
        {
500
                if (!$nth) {
1✔
501
                        return null;
1✔
502
                } elseif ($nth > 0) {
1✔
503
                        if ($needle === '') {
1✔
504
                                return 0;
1✔
505
                        }
506

507
                        $pos = 0;
1✔
508
                        while (($pos = strpos($haystack, $needle, $pos)) !== false && --$nth) {
1✔
509
                                $pos++;
1✔
510
                        }
511
                } else {
512
                        $len = strlen($haystack);
1✔
513
                        if ($needle === '') {
1✔
514
                                return $len;
1✔
515
                        } elseif ($len === 0) {
1✔
516
                                return null;
1✔
517
                        }
518

519
                        $pos = $len - 1;
1✔
520
                        while (($pos = strrpos($haystack, $needle, $pos - $len)) !== false && ++$nth) {
1✔
521
                                $pos--;
1✔
522
                        }
523
                }
524

525
                return Helpers::falseToNull($pos);
1✔
526
        }
527

528

529
        /**
530
         * Divides the string into arrays according to the regular expression. Expressions in parentheses will be captured and returned as well.
531
         */
532
        public static function split(
1✔
533
                string $subject,
534
                #[Language('RegExp')]
535
                string $pattern,
536
                bool|int $captureOffset = false,
537
                bool $skipEmpty = false,
538
                int $limit = -1,
539
                bool $utf8 = false,
540
        ): array
541
        {
542
                $flags = is_int($captureOffset)  // back compatibility
1✔
543
                        ? $captureOffset
1✔
544
                        : ($captureOffset ? PREG_SPLIT_OFFSET_CAPTURE : 0) | ($skipEmpty ? PREG_SPLIT_NO_EMPTY : 0);
1✔
545

546
                $pattern .= $utf8 ? 'u' : '';
1✔
547
                $m = self::pcre('preg_split', [$pattern, $subject, $limit, $flags | PREG_SPLIT_DELIM_CAPTURE]);
1✔
548
                return $utf8 && $captureOffset
1✔
549
                        ? self::bytesToChars($subject, [$m])[0]
1✔
550
                        : $m;
1✔
551
        }
552

553

554
        /**
555
         * Searches the string for the part matching the regular expression and returns
556
         * an array with the found expression and individual subexpressions, or `null`.
557
         */
558
        public static function match(
1✔
559
                string $subject,
560
                #[Language('RegExp')]
561
                string $pattern,
562
                bool|int $captureOffset = false,
563
                int $offset = 0,
564
                bool $unmatchedAsNull = false,
565
                bool $utf8 = false,
566
        ): ?array
567
        {
568
                $flags = is_int($captureOffset) // back compatibility
1✔
569
                        ? $captureOffset
1✔
570
                        : ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0);
1✔
571

572
                if ($utf8) {
1✔
573
                        $offset = strlen(self::substring($subject, 0, $offset));
1✔
574
                        $pattern .= 'u';
1✔
575
                }
576

577
                if ($offset > strlen($subject)) {
1✔
578
                        return null;
1✔
579
                } elseif (!self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset])) {
1✔
580
                        return null;
1✔
581
                } elseif ($utf8 && $captureOffset) {
1✔
582
                        return self::bytesToChars($subject, [$m])[0];
1✔
583
                } else {
584
                        return $m;
1✔
585
                }
586
        }
587

588

589
        /**
590
         * Searches the string for all occurrences matching the regular expression and
591
         * returns an array of arrays containing the found expression and each subexpression.
592
         * @return ($lazy is true ? \Generator<int, array> : array[])
593
         */
594
        public static function matchAll(
1✔
595
                string $subject,
596
                #[Language('RegExp')]
597
                string $pattern,
598
                bool|int $captureOffset = false,
599
                int $offset = 0,
600
                bool $unmatchedAsNull = false,
601
                bool $patternOrder = false,
602
                bool $utf8 = false,
603
                bool $lazy = false,
604
        ): array|\Generator
605
        {
606
                if ($utf8) {
1✔
607
                        $offset = strlen(self::substring($subject, 0, $offset));
1✔
608
                        $pattern .= 'u';
1✔
609
                }
610

611
                if ($lazy) {
1✔
612
                        $flags = PREG_OFFSET_CAPTURE | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0);
1✔
613
                        return (function () use ($utf8, $captureOffset, $flags, $subject, $pattern, $offset) {
1✔
614
                                $counter = 0;
1✔
615
                                while (
616
                                        $offset <= strlen($subject) - ($counter ? 1 : 0)
1✔
617
                                        && self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset])
1✔
618
                                ) {
619
                                        $offset = $m[0][1] + max(1, strlen($m[0][0]));
1✔
620
                                        if (!$captureOffset) {
1✔
621
                                                $m = array_map(fn($item) => $item[0], $m);
1✔
622
                                        } elseif ($utf8) {
1✔
623
                                                $m = self::bytesToChars($subject, [$m])[0];
1✔
624
                                        }
625
                                        yield $counter++ => $m;
1✔
626
                                }
627
                        })();
1✔
628
                }
629

630
                if ($offset > strlen($subject)) {
1✔
631
                        return [];
1✔
632
                }
633

634
                $flags = is_int($captureOffset) // back compatibility
1✔
635
                        ? $captureOffset
1✔
636
                        : ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0) | ($patternOrder ? PREG_PATTERN_ORDER : 0);
1✔
637

638
                self::pcre('preg_match_all', [
1✔
639
                        $pattern, $subject, &$m,
1✔
640
                        ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
1✔
641
                        $offset,
1✔
642
                ]);
643
                return $utf8 && $captureOffset
1✔
644
                        ? self::bytesToChars($subject, $m)
1✔
645
                        : $m;
1✔
646
        }
647

648

649
        /**
650
         * Replaces all occurrences matching regular expression $pattern which can be string or array in the form `pattern => replacement`.
651
         */
652
        public static function replace(
1✔
653
                string $subject,
654
                #[Language('RegExp')]
655
                string|array $pattern,
656
                string|callable $replacement = '',
657
                int $limit = -1,
658
                bool $captureOffset = false,
659
                bool $unmatchedAsNull = false,
660
                bool $utf8 = false,
661
        ): string
662
        {
663
                if (is_object($replacement) || is_array($replacement)) {
1✔
664
                        if (!is_callable($replacement, false, $textual)) {
1✔
665
                                throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
×
666
                        }
667

668
                        $flags = ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0);
1✔
669
                        if ($utf8) {
1✔
670
                                $pattern .= 'u';
1✔
671
                                if ($captureOffset) {
1✔
672
                                        $replacement = fn($m) => $replacement(self::bytesToChars($subject, [$m])[0]);
1✔
673
                                }
674
                        }
675

676
                        return self::pcre('preg_replace_callback', [$pattern, $replacement, $subject, $limit, 0, $flags]);
1✔
677

678
                } elseif (is_array($pattern) && is_string(key($pattern))) {
1✔
679
                        $replacement = array_values($pattern);
1✔
680
                        $pattern = array_keys($pattern);
1✔
681
                }
682

683
                if ($utf8) {
1✔
684
                        $pattern = array_map(fn($item) => $item . 'u', (array) $pattern);
1✔
685
                }
686

687
                return self::pcre('preg_replace', [$pattern, $replacement, $subject, $limit]);
1✔
688
        }
689

690

691
        private static function bytesToChars(string $s, array $groups): array
1✔
692
        {
693
                $lastBytes = $lastChars = 0;
1✔
694
                foreach ($groups as &$matches) {
1✔
695
                        foreach ($matches as &$match) {
1✔
696
                                if ($match[1] > $lastBytes) {
1✔
697
                                        $lastChars += self::length(substr($s, $lastBytes, $match[1] - $lastBytes));
1✔
698
                                } elseif ($match[1] < $lastBytes) {
1✔
699
                                        $lastChars -= self::length(substr($s, $match[1], $lastBytes - $match[1]));
1✔
700
                                }
701

702
                                $lastBytes = $match[1];
1✔
703
                                $match[1] = $lastChars;
1✔
704
                        }
705
                }
706

707
                return $groups;
1✔
708
        }
709

710

711
        /** @internal */
712
        public static function pcre(string $func, array $args)
1✔
713
        {
714
                $res = Callback::invokeSafe($func, $args, function (string $message) use ($args): void {
1✔
715
                        // compile-time error, not detectable by preg_last_error
716
                        throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0]));
1✔
717
                });
1✔
718

719
                if (($code = preg_last_error()) // run-time error, but preg_last_error & return code are liars
1✔
720
                        && ($res === null || !in_array($func, ['preg_filter', 'preg_replace_callback', 'preg_replace'], true))
1✔
721
                ) {
722
                        throw new RegexpException(preg_last_error_msg()
1✔
723
                                . ' (pattern: ' . implode(' or ', (array) $args[0]) . ')', $code);
1✔
724
                }
725

726
                return $res;
1✔
727
        }
728
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc