• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ducks-project / encoding-repair / 21293319579

23 Jan 2026 04:29PM UTC coverage: 94.057% (-2.9%) from 96.926%
21293319579

push

github

donaldinou
feat : normalize unittest

459 of 488 relevant lines covered (94.06%)

20.13 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.35
/CharsetProcessor.php
1
<?php
2

3
/**
4
 * Part of EncodingRepair package.
5
 *
6
 * (c) Adrien Loyant <donald_duck@team-df.org>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11

12
declare(strict_types=1);
13

14
namespace Ducks\Component\EncodingRepair;
15

16
use Ducks\Component\EncodingRepair\Detector\CachedDetector;
17
use Ducks\Component\EncodingRepair\Detector\DetectorChain;
18
use Ducks\Component\EncodingRepair\Detector\DetectorInterface;
19
use Ducks\Component\EncodingRepair\Detector\FileInfoDetector;
20
use Ducks\Component\EncodingRepair\Detector\MbStringDetector;
21
use Ducks\Component\EncodingRepair\Interpreter\ArrayInterpreter;
22
use Ducks\Component\EncodingRepair\Interpreter\InterpreterChain;
23
use Ducks\Component\EncodingRepair\Interpreter\ObjectInterpreter;
24
use Ducks\Component\EncodingRepair\Interpreter\PropertyMapperInterface;
25
use Ducks\Component\EncodingRepair\Interpreter\StringInterpreter;
26
use Ducks\Component\EncodingRepair\Interpreter\TypeInterpreterInterface;
27
use Ducks\Component\EncodingRepair\Transcoder\IconvTranscoder;
28
use Ducks\Component\EncodingRepair\Transcoder\MbStringTranscoder;
29
use Ducks\Component\EncodingRepair\Transcoder\TranscoderChain;
30
use Ducks\Component\EncodingRepair\Transcoder\TranscoderInterface;
31
use Ducks\Component\EncodingRepair\Transcoder\UConverterTranscoder;
32
use InvalidArgumentException;
33
use Normalizer;
34
use RuntimeException;
35

36
/**
37
 * Charset processing service.
38
 *
39
 * @final
40
 */
41
final class CharsetProcessor implements CharsetProcessorInterface
42
{
43
    private const DEFAULT_ENCODINGS = [
44
        self::ENCODING_UTF8,
45
        self::WINDOWS_1252,
46
        self::ENCODING_ISO,
47
        self::ENCODING_ASCII,
48
    ];
49

50
    private const MAX_REPAIR_DEPTH = 5;
51
    private const JSON_DEFAULT_DEPTH = 512;
52
    private const DEFAULT_MAX_SAMPLES = 1;
53

54
    /**
55
     * @var TranscoderChain
56
     */
57
    private TranscoderChain $transcoderChain;
58

59
    /**
60
     * @var DetectorChain
61
     */
62
    private DetectorChain $detectorChain;
63

64
    /**
65
     * @var InterpreterChain
66
     */
67
    private InterpreterChain $interpreterChain;
68

69
    /**
70
     * @var list<string>
71
     */
72
    private $allowedEncodings;
73

74
    public function __construct()
70✔
75
    {
76
        $this->transcoderChain = new TranscoderChain();
70✔
77
        $this->detectorChain = new DetectorChain();
70✔
78
        $this->interpreterChain = new InterpreterChain();
70✔
79
        $this->allowedEncodings = [
70✔
80
            self::AUTO,
70✔
81
            self::ENCODING_UTF8,
70✔
82
            self::WINDOWS_1252,
70✔
83
            self::ENCODING_ISO,
70✔
84
            self::ENCODING_ASCII,
70✔
85
            self::ENCODING_UTF16,
70✔
86
            self::ENCODING_UTF32,
70✔
87
        ];
70✔
88

89
        $this->resetTranscoders();
70✔
90
        $this->resetDetectors();
70✔
91
        $this->resetInterpreters();
70✔
92
    }
93

94
    /**
95
     * @inheritDoc
96
     */
97
    public function registerTranscoder(TranscoderInterface $transcoder, ?int $priority = null): self
6✔
98
    {
99
        $this->transcoderChain->register($transcoder, $priority);
6✔
100

101
        return $this;
6✔
102
    }
103

104
    /**
105
     * @inheritDoc
106
     */
107
    public function unregisterTranscoder(TranscoderInterface $transcoder): self
3✔
108
    {
109
        $this->transcoderChain->unregister($transcoder);
3✔
110

111
        return $this;
3✔
112
    }
113

114
    /**
115
     * @inheritDoc
116
     */
117
    public function queueTranscoders(TranscoderInterface ...$transcoders): self
1✔
118
    {
119
        foreach ($transcoders as $transcoder) {
1✔
120
            $this->registerTranscoder($transcoder);
1✔
121
        }
122

123
        return $this;
1✔
124
    }
125

126
    /**
127
     * @inheritDoc
128
     */
129
    public function resetTranscoders(): self
70✔
130
    {
131
        $this->transcoderChain = new TranscoderChain();
70✔
132
        $this->transcoderChain->register(new UConverterTranscoder());
70✔
133
        $this->transcoderChain->register(new IconvTranscoder());
70✔
134
        $this->transcoderChain->register(new MbStringTranscoder());
70✔
135

136
        return $this;
70✔
137
    }
138

139
    /**
140
     * @inheritDoc
141
     */
142
    public function registerDetector(DetectorInterface $detector, ?int $priority = null): self
5✔
143
    {
144
        $this->detectorChain->register($detector, $priority);
5✔
145

146
        return $this;
5✔
147
    }
148

149
    /**
150
     * @inheritDoc
151
     */
152
    public function unregisterDetector(DetectorInterface $detector): self
1✔
153
    {
154
        $this->detectorChain->unregister($detector);
1✔
155

156
        return $this;
1✔
157
    }
158

159
    /**
160
     * @inheritDoc
161
     */
162
    public function queueDetectors(DetectorInterface ...$detectors): self
1✔
163
    {
164
        foreach ($detectors as $detector) {
1✔
165
            $this->registerDetector($detector);
1✔
166
        }
167

168
        return $this;
1✔
169
    }
170

171
    /**
172
     * @inheritDoc
173
     */
174
    public function resetDetectors(): self
70✔
175
    {
176
        $this->detectorChain = new DetectorChain();
70✔
177
        $mbDetector = new MbStringDetector();
70✔
178
        $cachedDetector = new CachedDetector($mbDetector);
70✔
179
        $this->detectorChain->register($cachedDetector);
70✔
180
        $this->detectorChain->register(new FileInfoDetector());
70✔
181

182
        return $this;
70✔
183
    }
184

185
    /**
186
     * @inheritDoc
187
     */
188
    public function addEncodings(string ...$encodings): self
3✔
189
    {
190
        foreach ($encodings as $encoding) {
3✔
191
            if (!\in_array($encoding, $this->allowedEncodings, true)) {
3✔
192
                $this->allowedEncodings[] = $encoding;
3✔
193
            }
194
        }
195

196
        return $this;
3✔
197
    }
198

199
    /**
200
     * @inheritDoc
201
     */
202
    public function removeEncodings(string ...$encodings): self
2✔
203
    {
204
        $this->allowedEncodings = \array_values(
2✔
205
            \array_diff($this->allowedEncodings, $encodings)
2✔
206
        );
2✔
207

208
        return $this;
2✔
209
    }
210

211
    /**
212
     * @inheritDoc
213
     */
214
    public function getEncodings(): array
4✔
215
    {
216
        return $this->allowedEncodings;
4✔
217
    }
218

219
    /**
220
     * @inheritDoc
221
     */
222
    public function resetEncodings(): self
1✔
223
    {
224
        $this->allowedEncodings = [
1✔
225
            self::AUTO,
1✔
226
            self::ENCODING_UTF8,
1✔
227
            self::WINDOWS_1252,
1✔
228
            self::ENCODING_ISO,
1✔
229
            self::ENCODING_ASCII,
1✔
230
            self::ENCODING_UTF16,
1✔
231
            self::ENCODING_UTF32,
1✔
232
        ];
1✔
233

234
        return $this;
1✔
235
    }
236

237
    /**
238
     * @inheritDoc
239
     */
240
    public function registerInterpreter(TypeInterpreterInterface $interpreter, ?int $priority = null): self
×
241
    {
242
        $this->interpreterChain->register($interpreter, $priority);
×
243

244
        return $this;
×
245
    }
246

247
    /**
248
     * @inheritDoc
249
     */
250
    public function unregisterInterpreter(TypeInterpreterInterface $interpreter): self
×
251
    {
252
        $this->interpreterChain->unregister($interpreter);
×
253

254
        return $this;
×
255
    }
256

257
    /**
258
     * @inheritDoc
259
     */
260
    public function registerPropertyMapper(string $className, PropertyMapperInterface $mapper): self
1✔
261
    {
262
        $objectInterpreter = $this->interpreterChain->getObjectInterpreter();
1✔
263

264
        if (null === $objectInterpreter) {
1✔
265
            throw new RuntimeException('ObjectInterpreter not registered in chain');
×
266
        }
267

268
        $objectInterpreter->registerMapper($className, $mapper);
1✔
269

270
        return $this;
1✔
271
    }
272

273
    /**
274
     * @inheritDoc
275
     */
276
    public function resetInterpreters(): self
70✔
277
    {
278
        $this->interpreterChain = new InterpreterChain();
70✔
279
        $this->interpreterChain->register(new StringInterpreter(), 100);
70✔
280
        $this->interpreterChain->register(new ArrayInterpreter($this->interpreterChain), 50);
70✔
281
        $this->interpreterChain->register(new ObjectInterpreter($this->interpreterChain), 30);
70✔
282

283
        return $this;
70✔
284
    }
285

286
    /**
287
     * @inheritDoc
288
     */
289
    public function detect(string $string, array $options = []): string
10✔
290
    {
291
        if ($this->isValidUtf8($string)) {
10✔
292
            return self::ENCODING_UTF8;
7✔
293
        }
294

295
        $detected = $this->detectorChain->detect($string, $options);
3✔
296

297
        return $detected ?? self::ENCODING_ISO;
3✔
298
    }
299

300
    /**
301
     * @inheritDoc
302
     */
303
    public function detectBatch(iterable $items, array $options = []): string
1✔
304
    {
305
        /** @var mixed $maxSamples */
306
        $maxSamples = $options['maxSamples'] ?? self::DEFAULT_MAX_SAMPLES;
1✔
307
        if (!\is_int($maxSamples) || 1 > $maxSamples) {
1✔
308
            $maxSamples = self::DEFAULT_MAX_SAMPLES;
×
309
        }
310

311
        /** @var list<string> $samples */
312
        $samples = [];
1✔
313

314
        /** @var mixed $item */
315
        foreach ($items as $item) {
1✔
316
            if (\is_string($item) && '' !== $item) {
1✔
317
                $samples[] = $item;
1✔
318
                if (\count($samples) >= $maxSamples) {
1✔
319
                    break;
1✔
320
                }
321
            }
322
        }
323

324
        // Fast return.
325
        if (empty($samples)) {
1✔
326
            return self::ENCODING_ISO;
×
327
        }
328

329
        // Fast path: single sample (default behavior)
330
        if (1 === $maxSamples) {
1✔
331
            return $this->detect($samples[0], $options);
1✔
332
        }
333

334
        // Detect on longest sample (more reliable for multiple samples)
335
        $longest = \array_reduce(
×
336
            $samples,
×
337
            /**
338
             * @param null|string $carry
339
             * @param string $item
340
             */
341
            static fn ($carry, $item) => \strlen($item) > \strlen($carry ?? '') ? $item : $carry
×
342
        );
×
343

344
        return $this->detect($longest, $options);
×
345
    }
346

347
    /**
348
     * @inheritDoc
349
     */
350
    public function toCharset(
55✔
351
        $data,
352
        string $to = self::ENCODING_UTF8,
353
        string $from = self::ENCODING_ISO,
354
        array $options = []
355
    ) {
356
        $this->validateEncoding($to, 'target');
55✔
357
        $this->validateEncoding($from, 'source');
54✔
358

359
        $options = $this->configureOptions($options);
54✔
360

361
        // We define the callback logic for a single string
362
        /**
363
         * @psalm-suppress MissingClosureParamType
364
         * @psalm-suppress MissingClosureReturnType
365
         */
366
        $callback = fn ($value) => $this->convertValue($value, $to, $from, $options);
54✔
367

368
        return $this->applyRecursive($data, $callback);
54✔
369
    }
370

371
    /**
372
     * Converts anything (string, array, object) to UTF-8.
373
     *
374
     * @param mixed $data Data to convert
375
     * @param string $from Source encoding
376
     * @param array<string, mixed> $options Conversion options
377
     *                                      - 'normalize': bool (default: true)
378
     *                                      - 'translit': bool (default: true)
379
     *                                      - 'ignore': bool (default: true)
380
     *
381
     * @return mixed
382
     *
383
     * @throws InvalidArgumentException If encoding is invalid
384
     *
385
     * @psalm-api
386
     */
387
    public function toUtf8($data, string $from = self::WINDOWS_1252, array $options = [])
12✔
388
    {
389
        return $this->toCharset($data, self::ENCODING_UTF8, $from, $options);
12✔
390
    }
391

392
    /**
393
     * Converts anything to ISO-8859-1 (Windows-1252).
394
     *
395
     * @param mixed $data Data to convert
396
     * @param string $from Source encoding
397
     * @param array<string, mixed> $options Conversion options
398
     *                                      - 'normalize': bool (default: true)
399
     *                                      - 'translit': bool (default: true)
400
     *                                      - 'ignore': bool (default: true)
401
     *
402
     * @return mixed
403
     *
404
     * @throws InvalidArgumentException If encoding is invalid
405
     *
406
     * @psalm-api
407
     */
408
    public function toIso($data, string $from = self::ENCODING_UTF8, array $options = [])
×
409
    {
410
        return $this->toCharset($data, self::WINDOWS_1252, $from, $options);
×
411
    }
412

413
    /**
414
     * @inheritDoc
415
     */
416
    public function toCharsetBatch(
3✔
417
        array $items,
418
        string $to = self::ENCODING_UTF8,
419
        string $from = self::ENCODING_ISO,
420
        array $options = []
421
    ): array {
422
        $this->validateEncoding($to, 'target');
3✔
423
        $this->validateEncoding($from, 'source');
3✔
424

425
        if (self::AUTO === $from) {
3✔
426
            $from = $this->detectBatch($items, $options);
1✔
427
        }
428

429
        /** @psalm-suppress MissingClosureReturnType */
430
        return \array_map(fn ($item) => $this->toCharset($item, $to, $from, $options), $items);
3✔
431
    }
432

433
    /**
434
     * Batch convert array items from one encoding to utf8.
435
     *
436
     * Optimized for homogeneous arrays: detects encoding once on first non-empty string.
437
     * Use this instead of toUtf8() when processing large arrays with AUTO detection.
438
     *
439
     * @param array<mixed> $items Items to convert
440
     * @param string $from Source encoding (use AUTO for detection)
441
     * @param array<string, mixed> $options Conversion options
442
     *
443
     * @return array<mixed> Converted items
444
     *
445
     * @throws InvalidArgumentException If encoding is
446
     *
447
     * @psalm-api
448
     */
449
    public function toUtf8Batch(
×
450
        array $items,
451
        string $from = self::WINDOWS_1252,
452
        array $options = []
453
    ): array {
454
        return $this->toCharsetBatch($items, self::ENCODING_UTF8, $from, $options);
×
455
    }
456

457
    /**
458
     * Batch convert array items from one encoding to iso.
459
     *
460
     * Optimized for homogeneous arrays: detects encoding once on first non-empty string.
461
     * Use this instead of toIso() when processing large arrays with AUTO detection.
462
     *
463
     * @param array<mixed> $items Items to convert
464
     * @param string $from Source encoding (use AUTO for detection)
465
     * @param array<string, mixed> $options Conversion options
466
     *
467
     * @return array<mixed> Converted items
468
     *
469
     * @throws InvalidArgumentException If encoding is invalid
470
     *
471
     * @psalm-api
472
     */
473
    public function toIsoBatch(
×
474
        array $items,
475
        string $from = self::ENCODING_UTF8,
476
        array $options = []
477
    ): array {
478
        return $this->toCharsetBatch($items, self::WINDOWS_1252, $from, $options);
×
479
    }
480

481
    /**
482
     * @inheritDoc
483
     */
484
    public function repair(
18✔
485
        $data,
486
        string $to = self::ENCODING_UTF8,
487
        string $from = self::ENCODING_ISO,
488
        array $options = []
489
    ) {
490
        $options = $this->configureOptions($options, ['maxDepth' => self::MAX_REPAIR_DEPTH]);
18✔
491

492
        /**
493
         * @psalm-suppress MissingClosureParamType
494
         * @psalm-suppress MissingClosureReturnType
495
         */
496
        $callback = fn ($value) => $this->repairValue($value, $to, $from, $options);
18✔
497

498
        return $this->applyRecursive($data, $callback);
18✔
499
    }
500

501
    /**
502
     * @inheritDoc
503
     */
504
    public function safeJsonEncode(
5✔
505
        $data,
506
        int $flags = 0,
507
        int $depth = self::JSON_DEFAULT_DEPTH,
508
        string $from = self::WINDOWS_1252
509
    ): string {
510
        /** @var mixed $data */
511
        $data = $this->repair($data, self::ENCODING_UTF8, $from);
5✔
512

513
        // Force JSON_THROW_ON_ERROR flag
514
        return \json_encode($data, $flags | \JSON_THROW_ON_ERROR, $depth);
5✔
515
    }
516

517
    /**
518
     * @inheritDoc
519
     */
520
    public function safeJsonDecode(
5✔
521
        string $json,
522
        ?bool $associative = null,
523
        int $depth = self::JSON_DEFAULT_DEPTH,
524
        int $flags = 0,
525
        string $to = self::ENCODING_UTF8,
526
        string $from = self::WINDOWS_1252
527
    ) {
528
        // Repair string to a valid UTF-8 for decoding
529
        /** @var string $data */
530
        $data = $this->repair($json, self::ENCODING_UTF8, $from);
5✔
531

532
        // Force JSON_THROW_ON_ERROR flag
533
        /** @var mixed $result */
534
        $result = \json_decode($data, $associative, $depth, $flags | \JSON_THROW_ON_ERROR);
5✔
535

536
        return $this->toCharset($result, $to, self::ENCODING_UTF8);
4✔
537
    }
538

539
    /**
540
     * Applies a callback recursively using type interpreters.
541
     *
542
     * @param mixed $data Data to process
543
     * @param callable $callback Processing callback function
544
     *
545
     * @return mixed
546
     */
547
    private function applyRecursive($data, callable $callback)
56✔
548
    {
549
        return $this->interpreterChain->interpret($data, $callback, []);
56✔
550
    }
551

552
    /**
553
     * Converts a single value to target encoding.
554
     *
555
     * @param mixed $value Value to convert
556
     * @param string $to Target encoding
557
     * @param string $from Source encoding
558
     * @param array<string, mixed> $options Conversion configuration
559
     *
560
     * @return mixed
561
     */
562
    private function convertValue($value, string $to, string $from, array $options)
52✔
563
    {
564
        if (!\is_string($value)) {
52✔
565
            return $value;
×
566
        }
567

568
        // Special handling when converting FROM UTF-8
569
        // Do not trust mbstring when return utf-8 but we want another encoding,
570
        // because it will return true even if it's not really valid.
571
        if (self::ENCODING_UTF8 !== $to && $this->isValidUtf8($value)) {
52✔
572
            return $this->convertString($value, $to, self::ENCODING_UTF8, $options);
4✔
573
        }
574

575
        // Check if already in target encoding
576
        if (\mb_check_encoding($value, $to)) {
49✔
577
            return $this->normalize($value, $to, $options);
37✔
578
        }
579

580
        return $this->convertString($value, $to, $from, $options);
13✔
581
    }
582

583
    /**
584
     * Low-level string conversion logic.
585
     *
586
     * @param string $data String to convert
587
     * @param string $to Target encoding
588
     * @param string $from Source encoding
589
     * @param array<string, mixed> $options Conversion options
590
     *
591
     * @return string Converted string or $data if convertion failed
592
     */
593
    private function convertString(string $data, string $to, string $from, array $options): string
16✔
594
    {
595
        return $this->transcodeString($data, $to, $from, $options) ?? $data;
16✔
596
    }
597

598
    /**
599
     * Low-level string transcode logic with fallback strategies.
600
     *
601
     * @param string $data String to transcode
602
     * @param string $to Target encoding
603
     * @param string $from Source encoding
604
     * @param array<string, mixed> $options Conversion options
605
     *
606
     * @return ?string Converted string or null if failed.
607
     */
608
    private function transcodeString(string $data, string $to, string $from, array $options): ?string
32✔
609
    {
610
        // Optimize: detect once if both are AUTO
611
        $detectedEncoding = null;
32✔
612
        if (self::AUTO === $to || self::AUTO === $from) {
32✔
613
            $detectedEncoding = $this->detect($data, $options);
×
614
        }
615

616
        /** @var string $targetEncoding */
617
        $targetEncoding = self::AUTO === $to ? $detectedEncoding : $to;
32✔
618
        /** @var string $sourceEncoding */
619
        $sourceEncoding = self::AUTO === $from ? $detectedEncoding : $from;
32✔
620

621
        $result = $this->transcoderChain->transcode($data, $targetEncoding, $sourceEncoding, $options);
32✔
622

623
        if (null !== $result && self::ENCODING_UTF8 === $targetEncoding) {
32✔
624
            return $this->normalize($result, $targetEncoding, $options);
13✔
625
        }
626

627
        return $result;
20✔
628
    }
629

630
    /**
631
     * Repairs a double-encoded value.
632
     *
633
     * @param mixed $value Value to repair
634
     * @param string $to Target encoding
635
     * @param string $from Glitch encoding
636
     * @param array<string, mixed> $options Configuration
637
     *
638
     * @return mixed
639
     */
640
    private function repairValue($value, string $to, string $from, array $options)
16✔
641
    {
642
        if (!\is_string($value)) {
16✔
643
            // @codeCoverageIgnoreStart
644
            return $value;
645
            // @codeCoverageIgnoreEnd
646
        }
647

648
        /** @var mixed $maxDepth */
649
        $maxDepth = $options['maxDepth'] ?? self::MAX_REPAIR_DEPTH;
16✔
650
        if (!\is_int($maxDepth)) {
16✔
651
            $maxDepth = self::MAX_REPAIR_DEPTH;
2✔
652
        }
653

654
        $fixed = $this->peelEncodingLayers($value, $from, $maxDepth);
16✔
655
        $detectedEncoding = $this->isValidUtf8($fixed) ? self::ENCODING_UTF8 : $from;
16✔
656

657
        return $this->toCharset($fixed, $to, $detectedEncoding, $options);
16✔
658
    }
659

660
    /**
661
     * Attempts to remove multiple encoding layers.
662
     *
663
     * @param string $value String to repair
664
     * @param string $from Encoding to reverse
665
     * @param int $maxDepth Maximum iterations
666
     *
667
     * @return string Repaired string
668
     */
669
    private function peelEncodingLayers(string $value, string $from, int $maxDepth): string
16✔
670
    {
671
        $fixed = $value;
16✔
672
        $iterations = 0;
16✔
673
        $options = ['normalize' => false, 'translit' => false, 'ignore' => false];
16✔
674

675
        // Loop while it looks like valid UTF-8
676
        while ($iterations < $maxDepth && $this->isValidUtf8($fixed)) {
16✔
677
            // Attempt to reverse convert (UTF-8 -> $from)
678
            $test = $this->transcodeString($fixed, $from, self::ENCODING_UTF8, $options);
16✔
679

680
            // Break if conversion failed, no change, or result is longer (infinite loop detection)
681
            if (null === $test || $test === $fixed || \strlen($test) >= \strlen($fixed) || !$this->isValidUtf8($test)) {
16✔
682
                break;
16✔
683
            }
684

685
            // If conversion worked AND result is still valid UTF-8 AND result is different
686
            $fixed = $test;
×
687
            $iterations++;
×
688
        }
689

690
        return $fixed;
16✔
691
    }
692

693
    /**
694
     * Normalizes UTF-8 string if needed.
695
     *
696
     * @param string $value String to normalize
697
     * @param string $to Target encoding
698
     * @param array<string, mixed> $options Configuration
699
     *
700
     * @return string Normalized or original string
701
     *
702
     * @codeCoverageIgnore
703
     */
704
    private function normalize(string $value, string $to, array $options): string
705
    {
706
        // Only normalize if: target is UTF-8 AND normalize option is true
707
        if (self::ENCODING_UTF8 !== $to || false === ($options['normalize'] ?? true)) {
708
            return $value;
709
        }
710

711
        if (!\class_exists(Normalizer::class)) {
712
            return $value;
713
        }
714

715
        $normalized = Normalizer::normalize($value);
716

717
        return false !== $normalized ? $normalized : $value;
718
    }
719

720
    /**
721
     * Checks if string is valid UTF-8.
722
     *
723
     * Please not that it will use mb_check_encoding internally,
724
     * and could return true also if it's not really a full utf8 string.
725
     *
726
     * @param string $string String to check
727
     *
728
     * @return bool True if valid UTF-8
729
     */
730
    private function isValidUtf8(string $string): bool
30✔
731
    {
732
        return \mb_check_encoding($string, self::ENCODING_UTF8);
30✔
733
    }
734

735
    /**
736
     * Validates encoding name against whitelist.
737
     *
738
     * @param string $encoding Encoding to validate
739
     * @param string $type Type for error message (e.g., 'source', 'target')
740
     *
741
     * @throws InvalidArgumentException If encoding is not allowed
742
     */
743
    private function validateEncoding(string $encoding, string $type): void
55✔
744
    {
745
        $normalized = \strtoupper($encoding);
55✔
746

747
        if (
748
            !\in_array($encoding, $this->allowedEncodings, true)
55✔
749
            && !\in_array($normalized, $this->allowedEncodings, true)
55✔
750
        ) {
751
            throw new InvalidArgumentException(
1✔
752
                \sprintf(
1✔
753
                    'Invalid %s encoding: "%s". Allowed: %s',
1✔
754
                    $type,
1✔
755
                    $encoding,
1✔
756
                    \implode(', ', $this->allowedEncodings)
1✔
757
                )
1✔
758
            );
1✔
759
        }
760
    }
761

762
    /**
763
     * Builds conversion configuration with defaults.
764
     *
765
     * Merges user options with default values, allowing multiple override layers.
766
     *
767
     * @param array<string, mixed> $options User-provided options
768
     * @param array<string, mixed> ...$replacements Additional override layers
769
     *
770
     * @return array<string, mixed> Merged configuration
771
     *
772
     * @example
773
     * // Basic usage
774
     * $config = self::configureOptions(['normalize' => false]);
775
     *
776
     * // With additional defaults
777
     * $config = self::configureOptions(
778
     *     ['normalize' => false],
779
     *     ['maxDepth' => 10]
780
     * );
781
     */
782
    private function configureOptions(array $options, array ...$replacements): array
56✔
783
    {
784
        $replacements[] = $options;
56✔
785

786
        return \array_replace(
56✔
787
            ['normalize' => true, 'translit' => true, 'ignore' => true, 'encodings' => self::DEFAULT_ENCODINGS],
56✔
788
            ...$replacements
56✔
789
        );
56✔
790
    }
791
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc