• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PHPOffice / PhpSpreadsheet / 19842895431

02 Dec 2025 12:58AM UTC coverage: 95.963% (+0.002%) from 95.961%
19842895431

Pull #4726

github

web-flow
Merge 98ca50e9f into fd923c93c
Pull Request #4726: Slightly Better Support for Escaped Characters in Xlsx Reader/Writer

26 of 27 new or added lines in 1 file covered. (96.3%)

2 existing lines in 1 file now uncovered.

45524 of 47439 relevant lines covered (95.96%)

374.47 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.45
/src/PhpSpreadsheet/Shared/StringHelper.php
1
<?php
2

3
namespace PhpOffice\PhpSpreadsheet\Shared;
4

5
use Composer\Pcre\Preg;
6
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
7
use PhpOffice\PhpSpreadsheet\Exception as SpreadsheetException;
8
use Stringable;
9

10
class StringHelper
11
{
12
    private const CONTROL_CHARACTERS_KEYS = [
13
        "\x00",
14
        "\x01",
15
        "\x02",
16
        "\x03",
17
        "\x04",
18
        "\x05",
19
        "\x06",
20
        "\x07",
21
        "\x08",
22
        "\x0b",
23
        "\x0c",
24
        "\x0e",
25
        "\x0f",
26
        "\x10",
27
        "\x11",
28
        "\x12",
29
        "\x13",
30
        "\x14",
31
        "\x15",
32
        "\x16",
33
        "\x17",
34
        "\x18",
35
        "\x19",
36
        "\x1a",
37
        "\x1b",
38
        "\x1c",
39
        "\x1d",
40
        "\x1e",
41
        "\x1f",
42
    ];
43
    private const CONTROL_CHARACTERS_VALUES = [
44
        '_x0000_',
45
        '_x0001_',
46
        '_x0002_',
47
        '_x0003_',
48
        '_x0004_',
49
        '_x0005_',
50
        '_x0006_',
51
        '_x0007_',
52
        '_x0008_',
53
        '_x000B_',
54
        '_x000C_',
55
        '_x000E_',
56
        '_x000F_',
57
        '_x0010_',
58
        '_x0011_',
59
        '_x0012_',
60
        '_x0013_',
61
        '_x0014_',
62
        '_x0015_',
63
        '_x0016_',
64
        '_x0017_',
65
        '_x0018_',
66
        '_x0019_',
67
        '_x001A_',
68
        '_x001B_',
69
        '_x001C_',
70
        '_x001D_',
71
        '_x001E_',
72
        '_x001F_',
73
    ];
74

75
    /**
76
     * SYLK Characters array.
77
     */
78
    private const SYLK_CHARACTERS = [
79
        "\x1B 0" => "\x00",
80
        "\x1B 1" => "\x01",
81
        "\x1B 2" => "\x02",
82
        "\x1B 3" => "\x03",
83
        "\x1B 4" => "\x04",
84
        "\x1B 5" => "\x05",
85
        "\x1B 6" => "\x06",
86
        "\x1B 7" => "\x07",
87
        "\x1B 8" => "\x08",
88
        "\x1B 9" => "\x09",
89
        "\x1B :" => "\x0a",
90
        "\x1B ;" => "\x0b",
91
        "\x1B <" => "\x0c",
92
        "\x1B =" => "\x0d",
93
        "\x1B >" => "\x0e",
94
        "\x1B ?" => "\x0f",
95
        "\x1B!0" => "\x10",
96
        "\x1B!1" => "\x11",
97
        "\x1B!2" => "\x12",
98
        "\x1B!3" => "\x13",
99
        "\x1B!4" => "\x14",
100
        "\x1B!5" => "\x15",
101
        "\x1B!6" => "\x16",
102
        "\x1B!7" => "\x17",
103
        "\x1B!8" => "\x18",
104
        "\x1B!9" => "\x19",
105
        "\x1B!:" => "\x1a",
106
        "\x1B!;" => "\x1b",
107
        "\x1B!<" => "\x1c",
108
        "\x1B!=" => "\x1d",
109
        "\x1B!>" => "\x1e",
110
        "\x1B!?" => "\x1f",
111
        "\x1B'?" => "\x7f",
112
        "\x1B(0" => '€', // 128 in CP1252
113
        "\x1B(2" => '‚', // 130 in CP1252
114
        "\x1B(3" => 'ƒ', // 131 in CP1252
115
        "\x1B(4" => '„', // 132 in CP1252
116
        "\x1B(5" => '…', // 133 in CP1252
117
        "\x1B(6" => '†', // 134 in CP1252
118
        "\x1B(7" => '‡', // 135 in CP1252
119
        "\x1B(8" => 'ˆ', // 136 in CP1252
120
        "\x1B(9" => '‰', // 137 in CP1252
121
        "\x1B(:" => 'Š', // 138 in CP1252
122
        "\x1B(;" => '‹', // 139 in CP1252
123
        "\x1BNj" => 'Œ', // 140 in CP1252
124
        "\x1B(>" => 'Ž', // 142 in CP1252
125
        "\x1B)1" => '‘', // 145 in CP1252
126
        "\x1B)2" => '’', // 146 in CP1252
127
        "\x1B)3" => '“', // 147 in CP1252
128
        "\x1B)4" => '”', // 148 in CP1252
129
        "\x1B)5" => '•', // 149 in CP1252
130
        "\x1B)6" => '–', // 150 in CP1252
131
        "\x1B)7" => '—', // 151 in CP1252
132
        "\x1B)8" => '˜', // 152 in CP1252
133
        "\x1B)9" => '™', // 153 in CP1252
134
        "\x1B):" => 'š', // 154 in CP1252
135
        "\x1B);" => '›', // 155 in CP1252
136
        "\x1BNz" => 'œ', // 156 in CP1252
137
        "\x1B)>" => 'ž', // 158 in CP1252
138
        "\x1B)?" => 'Ÿ', // 159 in CP1252
139
        "\x1B*0" => ' ', // 160 in CP1252
140
        "\x1BN!" => '¡', // 161 in CP1252
141
        "\x1BN\"" => '¢', // 162 in CP1252
142
        "\x1BN#" => '£', // 163 in CP1252
143
        "\x1BN(" => '¤', // 164 in CP1252
144
        "\x1BN%" => '¥', // 165 in CP1252
145
        "\x1B*6" => '¦', // 166 in CP1252
146
        "\x1BN'" => '§', // 167 in CP1252
147
        "\x1BNH " => '¨', // 168 in CP1252
148
        "\x1BNS" => '©', // 169 in CP1252
149
        "\x1BNc" => 'ª', // 170 in CP1252
150
        "\x1BN+" => '«', // 171 in CP1252
151
        "\x1B*<" => '¬', // 172 in CP1252
152
        "\x1B*=" => '­', // 173 in CP1252
153
        "\x1BNR" => '®', // 174 in CP1252
154
        "\x1B*?" => '¯', // 175 in CP1252
155
        "\x1BN0" => '°', // 176 in CP1252
156
        "\x1BN1" => '±', // 177 in CP1252
157
        "\x1BN2" => '²', // 178 in CP1252
158
        "\x1BN3" => '³', // 179 in CP1252
159
        "\x1BNB " => '´', // 180 in CP1252
160
        "\x1BN5" => 'µ', // 181 in CP1252
161
        "\x1BN6" => '¶', // 182 in CP1252
162
        "\x1BN7" => '·', // 183 in CP1252
163
        "\x1B+8" => '¸', // 184 in CP1252
164
        "\x1BNQ" => '¹', // 185 in CP1252
165
        "\x1BNk" => 'º', // 186 in CP1252
166
        "\x1BN;" => '»', // 187 in CP1252
167
        "\x1BN<" => '¼', // 188 in CP1252
168
        "\x1BN=" => '½', // 189 in CP1252
169
        "\x1BN>" => '¾', // 190 in CP1252
170
        "\x1BN?" => '¿', // 191 in CP1252
171
        "\x1BNAA" => 'À', // 192 in CP1252
172
        "\x1BNBA" => 'Á', // 193 in CP1252
173
        "\x1BNCA" => 'Â', // 194 in CP1252
174
        "\x1BNDA" => 'Ã', // 195 in CP1252
175
        "\x1BNHA" => 'Ä', // 196 in CP1252
176
        "\x1BNJA" => 'Å', // 197 in CP1252
177
        "\x1BNa" => 'Æ', // 198 in CP1252
178
        "\x1BNKC" => 'Ç', // 199 in CP1252
179
        "\x1BNAE" => 'È', // 200 in CP1252
180
        "\x1BNBE" => 'É', // 201 in CP1252
181
        "\x1BNCE" => 'Ê', // 202 in CP1252
182
        "\x1BNHE" => 'Ë', // 203 in CP1252
183
        "\x1BNAI" => 'Ì', // 204 in CP1252
184
        "\x1BNBI" => 'Í', // 205 in CP1252
185
        "\x1BNCI" => 'Î', // 206 in CP1252
186
        "\x1BNHI" => 'Ï', // 207 in CP1252
187
        "\x1BNb" => 'Ð', // 208 in CP1252
188
        "\x1BNDN" => 'Ñ', // 209 in CP1252
189
        "\x1BNAO" => 'Ò', // 210 in CP1252
190
        "\x1BNBO" => 'Ó', // 211 in CP1252
191
        "\x1BNCO" => 'Ô', // 212 in CP1252
192
        "\x1BNDO" => 'Õ', // 213 in CP1252
193
        "\x1BNHO" => 'Ö', // 214 in CP1252
194
        "\x1B-7" => '×', // 215 in CP1252
195
        "\x1BNi" => 'Ø', // 216 in CP1252
196
        "\x1BNAU" => 'Ù', // 217 in CP1252
197
        "\x1BNBU" => 'Ú', // 218 in CP1252
198
        "\x1BNCU" => 'Û', // 219 in CP1252
199
        "\x1BNHU" => 'Ü', // 220 in CP1252
200
        "\x1B-=" => 'Ý', // 221 in CP1252
201
        "\x1BNl" => 'Þ', // 222 in CP1252
202
        "\x1BN{" => 'ß', // 223 in CP1252
203
        "\x1BNAa" => 'à', // 224 in CP1252
204
        "\x1BNBa" => 'á', // 225 in CP1252
205
        "\x1BNCa" => 'â', // 226 in CP1252
206
        "\x1BNDa" => 'ã', // 227 in CP1252
207
        "\x1BNHa" => 'ä', // 228 in CP1252
208
        "\x1BNJa" => 'å', // 229 in CP1252
209
        "\x1BNq" => 'æ', // 230 in CP1252
210
        "\x1BNKc" => 'ç', // 231 in CP1252
211
        "\x1BNAe" => 'è', // 232 in CP1252
212
        "\x1BNBe" => 'é', // 233 in CP1252
213
        "\x1BNCe" => 'ê', // 234 in CP1252
214
        "\x1BNHe" => 'ë', // 235 in CP1252
215
        "\x1BNAi" => 'ì', // 236 in CP1252
216
        "\x1BNBi" => 'í', // 237 in CP1252
217
        "\x1BNCi" => 'î', // 238 in CP1252
218
        "\x1BNHi" => 'ï', // 239 in CP1252
219
        "\x1BNs" => 'ð', // 240 in CP1252
220
        "\x1BNDn" => 'ñ', // 241 in CP1252
221
        "\x1BNAo" => 'ò', // 242 in CP1252
222
        "\x1BNBo" => 'ó', // 243 in CP1252
223
        "\x1BNCo" => 'ô', // 244 in CP1252
224
        "\x1BNDo" => 'õ', // 245 in CP1252
225
        "\x1BNHo" => 'ö', // 246 in CP1252
226
        "\x1B/7" => '÷', // 247 in CP1252
227
        "\x1BNy" => 'ø', // 248 in CP1252
228
        "\x1BNAu" => 'ù', // 249 in CP1252
229
        "\x1BNBu" => 'ú', // 250 in CP1252
230
        "\x1BNCu" => 'û', // 251 in CP1252
231
        "\x1BNHu" => 'ü', // 252 in CP1252
232
        "\x1B/=" => 'ý', // 253 in CP1252
233
        "\x1BN|" => 'þ', // 254 in CP1252
234
        "\x1BNHy" => 'ÿ', // 255 in CP1252
235
    ];
236

237
    /**
238
     * Decimal separator.
239
     */
240
    private static ?string $decimalSeparator = null;
241

242
    /**
243
     * Thousands separator.
244
     */
245
    private static ?string $thousandsSeparator = null;
246

247
    /**
248
     * Currency code.
249
     */
250
    private static ?string $currencyCode = null;
251

252
    /**
253
     * Is iconv extension available?
254
     */
255
    private static ?bool $isIconvEnabled = null;
256

257
    /**
258
     * iconv options.
259
     */
260
    private static string $iconvOptions = '//IGNORE//TRANSLIT';
261

262
    /**
263
     * Get whether iconv extension is available.
264
     */
265
    public static function getIsIconvEnabled(): bool
248✔
266
    {
267
        if (isset(self::$isIconvEnabled)) {
248✔
268
            return self::$isIconvEnabled;
248✔
269
        }
270

271
        // Assume no problems with iconv
272
        self::$isIconvEnabled = true;
76✔
273

274
        // Fail if iconv doesn't exist
275
        if (!function_exists('iconv')) {
76✔
NEW
276
            self::$isIconvEnabled = false;
×
277
        } elseif (!@iconv('UTF-8', 'UTF-16LE', 'x')) {
76✔
278
            // Sometimes iconv is not working, and e.g. iconv('UTF-8', 'UTF-16LE', 'x') just returns false,
UNCOV
279
            self::$isIconvEnabled = false;
×
280
        } elseif (defined('PHP_OS') && @stristr(PHP_OS, 'AIX') && defined('ICONV_IMPL') && (@strcasecmp(ICONV_IMPL, 'unknown') == 0) && defined('ICONV_VERSION') && (@strcasecmp(ICONV_VERSION, 'unknown') == 0)) {
76✔
281
            // CUSTOM: IBM AIX iconv() does not work
UNCOV
282
            self::$isIconvEnabled = false;
×
283
        }
284

285
        // Deactivate iconv default options if they fail (as seen on IMB i)
286
        if (self::$isIconvEnabled && !@iconv('UTF-8', 'UTF-16LE' . self::$iconvOptions, 'x')) {
76✔
287
            self::$iconvOptions = '';
×
288
        }
289

290
        return self::$isIconvEnabled;
76✔
291
    }
292

293
    /**
294
     * Convert from OpenXML escaped control character to PHP control character.
295
     *
296
     * Excel 2007 team:
297
     * ----------------
298
     * That's correct, control characters are stored directly in the shared-strings table.
299
     * We do encode characters that cannot be represented in XML using the following escape sequence:
300
     * _xHHHH_ where H represents a hexadecimal character in the character's value...
301
     * So you could end up with something like _x0008_ in a string (either in a cell value (<v>)
302
     * element or in the shared string <t> element.
303
     *
304
     * @param string $textValue Value to unescape
305
     */
306
    public static function controlCharacterOOXML2PHP(string $textValue): string
542✔
307
    {
308
        return Preg::replaceCallback('/_x[0-9A-F]{4}_(_xD[CDEF][0-9A-F]{2}_)?/', self::toOutChar(...), $textValue);
542✔
309
    }
310

311
    private static function toHexVal(string $char): int
7✔
312
    {
313
        if ($char >= '0' && $char <= '9') {
7✔
314
            return ord($char) - ord('0');
7✔
315
        }
316

317
        return ord($char) - ord('A') + 10;
7✔
318
    }
319

320
    /** @param array<?string> $match */
321
    private static function toOutChar(array $match): string
7✔
322
    {
323
        /** @var string */
324
        $chars = $match[0];
7✔
325
        $h = ((self::toHexVal($chars[2]) << 12)
7✔
326
            | (self::toHexVal($chars[3]) << 8)
7✔
327
            | (self::toHexVal($chars[4]) << 4)
7✔
328
            | (self::toHexVal($chars[5])));
7✔
329
        if (strlen($chars) === 7) { // no low surrogate
7✔
330
            if ($chars[2] === 'D' && in_array($chars[3], ['8', '9', 'A', 'B', 'C', 'D', 'E', 'F'], true)) {
7✔
331
                return mb_chr(0xFFFD, 'UTF-8');
2✔
332
            }
333

334
            return mb_chr($h, 'UTF-8');
7✔
335
        }
336
        if ($chars[2] !== 'D' || !in_array($chars[3], ['8', '9', 'A', 'B'], true)) {
2✔
337
            return mb_chr($h, 'UTF-8') . mb_chr(0xFFFD, 'UTF-8');
2✔
338
        }
339
        $l = ((self::toHexVal($chars[9]) << 12)
2✔
340
            | (self::toHexVal($chars[10]) << 8)
2✔
341
            | (self::toHexVal($chars[11]) << 4)
2✔
342
            | (self::toHexVal($chars[12])));
2✔
343
        $result = 0x10000 + ($h - 0xD800) * 0x400 + ($l - 0xDC00);
2✔
344

345
        return mb_chr($result, 'UTF-8');
2✔
346
    }
347

348
    /**
349
     * Convert from PHP control character to OpenXML escaped control character.
350
     *
351
     * Excel 2007 team:
352
     * ----------------
353
     * That's correct, control characters are stored directly in the shared-strings table.
354
     * We do encode characters that cannot be represented in XML using the following escape sequence:
355
     * _xHHHH_ where H represents a hexadecimal character in the character's value...
356
     * So you could end up with something like _x0008_ in a string (either in a cell value (<v>)
357
     * element or in the shared string <t> element.
358
     *
359
     * @param string $textValue Value to escape
360
     */
361
    public static function controlCharacterPHP2OOXML(string $textValue): string
292✔
362
    {
363
        $textValue = Preg::replace('/_(x[0-9A-F]{4}_)/', '_x005F_$1', $textValue);
292✔
364

365
        return str_replace(self::CONTROL_CHARACTERS_KEYS, self::CONTROL_CHARACTERS_VALUES, $textValue);
292✔
366
    }
367

368
    /**
369
     * Try to sanitize UTF8, replacing invalid sequences with Unicode substitution characters.
370
     */
371
    public static function sanitizeUTF8(string $textValue): string
9,269✔
372
    {
373
        $textValue = str_replace(["\xef\xbf\xbe", "\xef\xbf\xbf"], "\xef\xbf\xbd", $textValue);
9,269✔
374
        $subst = mb_substitute_character(); // default is question mark
9,269✔
375
        mb_substitute_character(65533); // Unicode substitution character
9,269✔
376
        $returnValue = (string) mb_convert_encoding($textValue, 'UTF-8', 'UTF-8');
9,269✔
377
        mb_substitute_character($subst);
9,269✔
378

379
        return $returnValue;
9,269✔
380
    }
381

382
    /**
383
     * Check if a string contains UTF8 data.
384
     */
385
    public static function isUTF8(string $textValue): bool
1✔
386
    {
387
        return $textValue === self::sanitizeUTF8($textValue);
1✔
388
    }
389

390
    /**
391
     * Formats a numeric value as a string for output in various output writers forcing
392
     * point as decimal separator in case locale is other than English.
393
     */
394
    public static function formatNumber(float|int|string|null $numericValue): string
1,029✔
395
    {
396
        if (is_float($numericValue)) {
1,029✔
397
            return str_replace(',', '.', (string) $numericValue);
1,029✔
398
        }
399

400
        return (string) $numericValue;
100✔
401
    }
402

403
    /**
404
     * Converts a UTF-8 string into BIFF8 Unicode string data (8-bit string length)
405
     * Writes the string using uncompressed notation, no rich text, no Asian phonetics
406
     * If mbstring extension is not available, ASCII is assumed, and compressed notation is used
407
     * although this will give wrong results for non-ASCII strings
408
     * see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3.
409
     *
410
     * @param string $textValue UTF-8 encoded string
411
     * @param array<int, array{strlen: int, fontidx: int}> $arrcRuns Details of rich text runs in $value
412
     */
413
    public static function UTF8toBIFF8UnicodeShort(string $textValue, array $arrcRuns = []): string
127✔
414
    {
415
        // character count
416
        $ln = self::countCharacters($textValue, 'UTF-8');
127✔
417
        // option flags
418
        if (empty($arrcRuns)) {
127✔
419
            $data = pack('CC', $ln, 0x0001);
127✔
420
            // characters
421
            $data .= self::convertEncoding($textValue, 'UTF-16LE', 'UTF-8');
127✔
422
        } else {
423
            $data = pack('vC', $ln, 0x09);
13✔
424
            $data .= pack('v', count($arrcRuns));
13✔
425
            // characters
426
            $data .= self::convertEncoding($textValue, 'UTF-16LE', 'UTF-8');
13✔
427
            foreach ($arrcRuns as $cRun) {
13✔
428
                $data .= pack('v', $cRun['strlen']);
13✔
429
                $data .= pack('v', $cRun['fontidx']);
13✔
430
            }
431
        }
432

433
        return $data;
127✔
434
    }
435

436
    /**
437
     * Converts a UTF-8 string into BIFF8 Unicode string data (16-bit string length)
438
     * Writes the string using uncompressed notation, no rich text, no Asian phonetics
439
     * If mbstring extension is not available, ASCII is assumed, and compressed notation is used
440
     * although this will give wrong results for non-ASCII strings
441
     * see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3.
442
     *
443
     * @param string $textValue UTF-8 encoded string
444
     */
445
    public static function UTF8toBIFF8UnicodeLong(string $textValue): string
128✔
446
    {
447
        // characters
448
        $chars = self::convertEncoding($textValue, 'UTF-16LE', 'UTF-8');
128✔
449
        $ln = (int) (strlen($chars) / 2);  // N.B. - strlen, not mb_strlen issue #642
128✔
450

451
        return pack('vC', $ln, 0x0001) . $chars;
128✔
452
    }
453

454
    /**
455
     * Convert string from one encoding to another.
456
     *
457
     * @param string $to Encoding to convert to, e.g. 'UTF-8'
458
     * @param string $from Encoding to convert from, e.g. 'UTF-16LE'
459
     */
460
    public static function convertEncoding(string $textValue, string $to, string $from): string
247✔
461
    {
462
        if (static::getIsIconvEnabled()) {
247✔
463
            $result = iconv($from, $to . self::$iconvOptions, $textValue);
247✔
464
            if (false !== $result) {
247✔
465
                return $result;
247✔
466
            }
467
        }
468

469
        return (string) mb_convert_encoding($textValue, $to, $from);
1✔
470
    }
471

472
    /**
473
     * Get character count.
474
     *
475
     * @param string $encoding Encoding
476
     *
477
     * @return int Character count
478
     */
479
    public static function countCharacters(string $textValue, string $encoding = 'UTF-8'): int
10,904✔
480
    {
481
        return mb_strlen($textValue, $encoding);
10,904✔
482
    }
483

484
    /**
485
     * Get character count using mb_strwidth rather than mb_strlen.
486
     *
487
     * @param string $encoding Encoding
488
     *
489
     * @return int Character count
490
     */
491
    public static function countCharactersDbcs(string $textValue, string $encoding = 'UTF-8'): int
82✔
492
    {
493
        return mb_strwidth($textValue, $encoding);
82✔
494
    }
495

496
    /**
497
     * Get a substring of a UTF-8 encoded string.
498
     *
499
     * @param string $textValue UTF-8 encoded string
500
     * @param int $offset Start offset
501
     * @param ?int $length Maximum number of characters in substring
502
     */
503
    public static function substring(string $textValue, int $offset, ?int $length = 0): string
10,876✔
504
    {
505
        return mb_substr($textValue, $offset, $length, 'UTF-8');
10,876✔
506
    }
507

508
    /**
509
     * Convert a UTF-8 encoded string to upper case.
510
     *
511
     * @param string $textValue UTF-8 encoded string
512
     */
513
    public static function strToUpper(string $textValue): string
10,654✔
514
    {
515
        return mb_convert_case($textValue, MB_CASE_UPPER, 'UTF-8');
10,654✔
516
    }
517

518
    /**
519
     * Convert a UTF-8 encoded string to lower case.
520
     *
521
     * @param string $textValue UTF-8 encoded string
522
     */
523
    public static function strToLower(string $textValue): string
10,345✔
524
    {
525
        return mb_convert_case($textValue, MB_CASE_LOWER, 'UTF-8');
10,345✔
526
    }
527

528
    /**
529
     * Convert a UTF-8 encoded string to title/proper case
530
     * (uppercase every first character in each word, lower case all other characters).
531
     *
532
     * @param string $textValue UTF-8 encoded string
533
     */
534
    public static function strToTitle(string $textValue): string
18✔
535
    {
536
        return mb_convert_case($textValue, MB_CASE_TITLE, 'UTF-8');
18✔
537
    }
538

539
    public static function mbIsUpper(string $character): bool
21✔
540
    {
541
        return mb_strtolower($character, 'UTF-8') !== $character;
21✔
542
    }
543

544
    /**
545
     * Splits a UTF-8 string into an array of individual characters.
546
     *
547
     * @return string[]
548
     */
549
    public static function mbStrSplit(string $string): array
21✔
550
    {
551
        // Split at all position not after the start: ^
552
        // and not before the end: $
553
        $split = Preg::split('/(?<!^)(?!$)/u', $string);
21✔
554

555
        return $split;
21✔
556
    }
557

558
    /**
559
     * Reverse the case of a string, so that all uppercase characters become lowercase
560
     * and all lowercase characters become uppercase.
561
     *
562
     * @param string $textValue UTF-8 encoded string
563
     */
564
    public static function strCaseReverse(string $textValue): string
21✔
565
    {
566
        $characters = self::mbStrSplit($textValue);
21✔
567
        foreach ($characters as &$character) {
21✔
568
            if (self::mbIsUpper($character)) {
21✔
569
                $character = mb_strtolower($character, 'UTF-8');
14✔
570
            } else {
571
                $character = mb_strtoupper($character, 'UTF-8');
17✔
572
            }
573
        }
574

575
        return implode('', $characters);
21✔
576
    }
577

578
    private static function useAlt(string $altValue, string $default, bool $trimAlt): string
1✔
579
    {
580
        return ($trimAlt ? trim($altValue) : $altValue) ?: $default;
1✔
581
    }
582

583
    private static function getLocaleValue(string $key, string $altKey, string $default, bool $trimAlt = false): string
145✔
584
    {
585
        /** @var string[] */
586
        $localeconv = localeconv();
145✔
587
        $rslt = $localeconv[$key];
145✔
588
        // win-1252 implements Euro as 0x80 plus other symbols
589
        // Not suitable for Composer\Pcre\Preg
590
        if (preg_match('//u', $rslt) !== 1) {
145✔
591
            $rslt = '';
1✔
592
        }
593

594
        return $rslt ?: self::useAlt($localeconv[$altKey], $default, $trimAlt);
145✔
595
    }
596

597
    /**
598
     * Get the decimal separator. If it has not yet been set explicitly, try to obtain number
599
     * formatting information from locale.
600
     */
601
    public static function getDecimalSeparator(): string
1,132✔
602
    {
603
        if (!isset(self::$decimalSeparator)) {
1,132✔
604
            self::$decimalSeparator = self::getLocaleValue('decimal_point', 'mon_decimal_point', '.');
133✔
605
        }
606

607
        return self::$decimalSeparator;
1,132✔
608
    }
609

610
    /**
611
     * Set the decimal separator. Only used by NumberFormat::toFormattedString()
612
     * to format output by \PhpOffice\PhpSpreadsheet\Writer\Html and \PhpOffice\PhpSpreadsheet\Writer\Pdf.
613
     *
614
     * @param ?string $separator Character for decimal separator
615
     */
616
    public static function setDecimalSeparator(?string $separator): void
928✔
617
    {
618
        self::$decimalSeparator = $separator;
928✔
619
    }
620

621
    /**
622
     * Get the thousands separator. If it has not yet been set explicitly, try to obtain number
623
     * formatting information from locale.
624
     */
625
    public static function getThousandsSeparator(): string
1,144✔
626
    {
627
        if (!isset(self::$thousandsSeparator)) {
1,144✔
628
            self::$thousandsSeparator = self::getLocaleValue('thousands_sep', 'mon_thousands_sep', ',');
134✔
629
        }
630

631
        return self::$thousandsSeparator;
1,144✔
632
    }
633

634
    /**
635
     * Set the thousands separator. Only used by NumberFormat::toFormattedString()
636
     * to format output by \PhpOffice\PhpSpreadsheet\Writer\Html and \PhpOffice\PhpSpreadsheet\Writer\Pdf.
637
     *
638
     * @param ?string $separator Character for thousands separator
639
     */
640
    public static function setThousandsSeparator(?string $separator): void
928✔
641
    {
642
        self::$thousandsSeparator = $separator;
928✔
643
    }
644

645
    /**
646
     *    Get the currency code. If it has not yet been set explicitly, try to obtain the
647
     *        symbol information from locale.
648
     */
649
    public static function getCurrencyCode(bool $trimAlt = false): string
78✔
650
    {
651
        if (!isset(self::$currencyCode)) {
78✔
652
            self::$currencyCode = self::getLocaleValue('currency_symbol', 'int_curr_symbol', '$', $trimAlt);
28✔
653
        }
654

655
        return self::$currencyCode;
78✔
656
    }
657

658
    /**
659
     * Set the currency code. Only used by NumberFormat::toFormattedString()
660
     *        to format output by \PhpOffice\PhpSpreadsheet\Writer\Html and \PhpOffice\PhpSpreadsheet\Writer\Pdf.
661
     *
662
     * @param ?string $currencyCode Character for currency code
663
     */
664
    public static function setCurrencyCode(?string $currencyCode): void
926✔
665
    {
666
        self::$currencyCode = $currencyCode;
926✔
667
    }
668

669
    /**
670
     * Convert SYLK encoded string to UTF-8.
671
     *
672
     * @param string $textValue SYLK encoded string
673
     *
674
     * @return string UTF-8 encoded string
675
     */
676
    public static function SYLKtoUTF8(string $textValue): string
11✔
677
    {
678
        // If there is no escape character in the string there is nothing to do
679
        if (!str_contains($textValue, "\x1b")) {
11✔
680
            return $textValue;
10✔
681
        }
682

683
        foreach (self::SYLK_CHARACTERS as $k => $v) {
3✔
684
            $textValue = str_replace($k, $v, $textValue);
3✔
685
        }
686

687
        return $textValue;
3✔
688
    }
689

690
    /**
691
     * Retrieve any leading numeric part of a string, or return the full string if no leading numeric
692
     * (handles basic integer or float, but not exponent or non decimal).
693
     *
694
     * @return float|string string or only the leading numeric part of the string
695
     */
696
    public static function testStringAsNumeric(string $textValue): float|string
302✔
697
    {
698
        if (is_numeric($textValue)) {
302✔
699
            return $textValue;
299✔
700
        }
701
        $v = (float) $textValue;
6✔
702

703
        return (is_numeric(substr($textValue, 0, strlen((string) $v)))) ? $v : $textValue;
6✔
704
    }
705

706
    public static function strlenAllowNull(?string $string): int
39✔
707
    {
708
        return strlen("$string");
39✔
709
    }
710

711
    /**
712
     * @param bool $convertBool If true, convert bool to locale-aware TRUE/FALSE rather than 1/null-string
713
     * @param bool $lessFloatPrecision If true, floats will be converted to a more human-friendly but less computationally accurate value
714
     */
715
    public static function convertToString(mixed $value, bool $throw = true, string $default = '', bool $convertBool = false, bool $lessFloatPrecision = false): string
14,433✔
716
    {
717
        if ($convertBool && is_bool($value)) {
14,433✔
718
            return $value ? Calculation::getTRUE() : Calculation::getFALSE();
3✔
719
        }
720
        if (is_float($value) && !$lessFloatPrecision) {
14,433✔
721
            $string = (string) $value;
4,711✔
722
            // look out for scientific notation
723
            if (!Preg::isMatch('/[^-+0-9.]/', $string)) {
4,711✔
724
                $minus = $value < 0 ? '-' : '';
4,683✔
725
                $positive = abs($value);
4,683✔
726
                $floor = floor($positive);
4,683✔
727
                $oldFrac = (string) ($positive - $floor);
4,683✔
728
                $frac = Preg::replace('/^0[.](\d+)$/', '$1', $oldFrac);
4,683✔
729
                if ($frac !== $oldFrac) {
4,683✔
730
                    return "$minus$floor.$frac";
4,512✔
731
                }
732
            }
733

734
            return $string;
423✔
735
        }
736
        if ($value === null || is_scalar($value) || $value instanceof Stringable) {
14,429✔
737
            return (string) $value;
14,425✔
738
        }
739

740
        if ($throw) {
5✔
741
            throw new SpreadsheetException('Unable to convert to string');
5✔
742
        }
743

744
        return $default;
1✔
745
    }
746

747
    /**
748
     * Assist with POST items when samples are run in browser.
749
     * Never run as part of unit tests, which are command line.
750
     *
751
     * @codeCoverageIgnore
752
     */
753
    public static function convertPostToString(string $index, string $default = ''): string
754
    {
755
        if (isset($_POST[$index])) {
756
            return htmlentities(self::convertToString($_POST[$index], false, $default));
757
        }
758

759
        return $default;
760
    }
761

762
    /**
763
     * Php introduced str_increment with Php8.3,
764
     * but didn't issue deprecation notices till 8.5.
765
     *
766
     * @codeCoverageIgnore
767
     */
768
    public static function stringIncrement(string &$str): string
769
    {
770
        if (function_exists('str_increment')) {
771
            $str = str_increment($str); // @phpstan-ignore-line
772
        } else {
773
            ++$str; // @phpstan-ignore-line
774
        }
775

776
        return $str; // @phpstan-ignore-line
777
    }
778
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc