• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ducks-project / encoding-repair / 21291993490

23 Jan 2026 03:45PM UTC coverage: 97.336% (-2.7%) from 100.0%
21291993490

push

github

donaldinou
fix : tests

1 of 1 new or added line in 1 file covered. (100.0%)

11 existing lines in 2 files now uncovered.

475 of 488 relevant lines covered (97.34%)

29.3 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.15
/CharsetProcessor.php
1
<?php
2

3
/**
4
 * Part of EncodingRepair package.
5
 *
6
 * (c) Adrien Loyant <donald_duck@team-df.org>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11

12
declare(strict_types=1);
13

14
namespace Ducks\Component\EncodingRepair;
15

16
use Ducks\Component\EncodingRepair\Detector\CachedDetector;
17
use Ducks\Component\EncodingRepair\Detector\DetectorChain;
18
use Ducks\Component\EncodingRepair\Detector\DetectorInterface;
19
use Ducks\Component\EncodingRepair\Detector\FileInfoDetector;
20
use Ducks\Component\EncodingRepair\Detector\MbStringDetector;
21
use Ducks\Component\EncodingRepair\Interpreter\ArrayInterpreter;
22
use Ducks\Component\EncodingRepair\Interpreter\InterpreterChain;
23
use Ducks\Component\EncodingRepair\Interpreter\ObjectInterpreter;
24
use Ducks\Component\EncodingRepair\Interpreter\PropertyMapperInterface;
25
use Ducks\Component\EncodingRepair\Interpreter\StringInterpreter;
26
use Ducks\Component\EncodingRepair\Interpreter\TypeInterpreterInterface;
27
use Ducks\Component\EncodingRepair\Transcoder\IconvTranscoder;
28
use Ducks\Component\EncodingRepair\Transcoder\MbStringTranscoder;
29
use Ducks\Component\EncodingRepair\Transcoder\TranscoderChain;
30
use Ducks\Component\EncodingRepair\Transcoder\TranscoderInterface;
31
use Ducks\Component\EncodingRepair\Transcoder\UConverterTranscoder;
32
use InvalidArgumentException;
33
use JsonException;
34
use Normalizer;
35
use RuntimeException;
36

37
/**
38
 * Charset processing service.
39
 *
40
 * @final
41
 */
42
final class CharsetProcessor implements CharsetProcessorInterface
43
{
44
    private const DEFAULT_ENCODINGS = [
45
        self::ENCODING_UTF8,
46
        self::WINDOWS_1252,
47
        self::ENCODING_ISO,
48
        self::ENCODING_ASCII,
49
    ];
50

51
    private const MAX_REPAIR_DEPTH = 5;
52
    private const JSON_DEFAULT_DEPTH = 512;
53
    private const DEFAULT_MAX_SAMPLES = 1;
54

55
    /**
56
     * @var TranscoderChain
57
     */
58
    private TranscoderChain $transcoderChain;
59

60
    /**
61
     * @var DetectorChain
62
     */
63
    private DetectorChain $detectorChain;
64

65
    /**
66
     * @var InterpreterChain
67
     */
68
    private InterpreterChain $interpreterChain;
69

70
    /**
71
     * @var list<string>
72
     */
73
    private $allowedEncodings;
74

75
    public function __construct()
111✔
76
    {
77
        $this->transcoderChain = new TranscoderChain();
111✔
78
        $this->detectorChain = new DetectorChain();
111✔
79
        $this->interpreterChain = new InterpreterChain();
111✔
80
        $this->allowedEncodings = [
111✔
81
            self::AUTO,
111✔
82
            self::ENCODING_UTF8,
111✔
83
            self::WINDOWS_1252,
111✔
84
            self::ENCODING_ISO,
111✔
85
            self::ENCODING_ASCII,
111✔
86
            self::ENCODING_UTF16,
111✔
87
            self::ENCODING_UTF32,
111✔
88
        ];
111✔
89

90
        $this->resetTranscoders();
111✔
91
        $this->resetDetectors();
111✔
92
        $this->resetInterpreters();
111✔
93
    }
94

95
    /**
96
     * @inheritDoc
97
     */
98
    public function registerTranscoder(TranscoderInterface $transcoder, ?int $priority = null): self
6✔
99
    {
100
        $this->transcoderChain->register($transcoder, $priority);
6✔
101

102
        return $this;
6✔
103
    }
104

105
    /**
106
     * @inheritDoc
107
     */
108
    public function unregisterTranscoder(TranscoderInterface $transcoder): self
3✔
109
    {
110
        $this->transcoderChain->unregister($transcoder);
3✔
111

112
        return $this;
3✔
113
    }
114

115
    /**
116
     * @inheritDoc
117
     */
118
    public function queueTranscoders(TranscoderInterface ...$transcoders): self
1✔
119
    {
120
        foreach ($transcoders as $transcoder) {
1✔
121
            $this->registerTranscoder($transcoder);
1✔
122
        }
123

124
        return $this;
1✔
125
    }
126

127
    /**
128
     * @inheritDoc
129
     */
130
    public function resetTranscoders(): self
111✔
131
    {
132
        $this->transcoderChain = new TranscoderChain();
111✔
133
        $this->transcoderChain->register(new UConverterTranscoder());
111✔
134
        $this->transcoderChain->register(new IconvTranscoder());
111✔
135
        $this->transcoderChain->register(new MbStringTranscoder());
111✔
136

137
        return $this;
111✔
138
    }
139

140
    /**
141
     * @inheritDoc
142
     */
143
    public function registerDetector(DetectorInterface $detector, ?int $priority = null): self
5✔
144
    {
145
        $this->detectorChain->register($detector, $priority);
5✔
146

147
        return $this;
5✔
148
    }
149

150
    /**
151
     * @inheritDoc
152
     */
153
    public function unregisterDetector(DetectorInterface $detector): self
1✔
154
    {
155
        $this->detectorChain->unregister($detector);
1✔
156

157
        return $this;
1✔
158
    }
159

160
    /**
161
     * @inheritDoc
162
     */
163
    public function queueDetectors(DetectorInterface ...$detectors): self
1✔
164
    {
165
        foreach ($detectors as $detector) {
1✔
166
            $this->registerDetector($detector);
1✔
167
        }
168

169
        return $this;
1✔
170
    }
171

172
    /**
173
     * @inheritDoc
174
     */
175
    public function resetDetectors(): self
111✔
176
    {
177
        $this->detectorChain = new DetectorChain();
111✔
178
        $mbDetector = new MbStringDetector();
111✔
179
        $cachedDetector = new CachedDetector($mbDetector);
111✔
180
        $this->detectorChain->register($cachedDetector);
111✔
181
        $this->detectorChain->register(new FileInfoDetector());
111✔
182

183
        return $this;
111✔
184
    }
185

186
    /**
187
     * @inheritDoc
188
     */
189
    public function addEncodings(string ...$encodings): self
6✔
190
    {
191
        foreach ($encodings as $encoding) {
6✔
192
            if (!\in_array($encoding, $this->allowedEncodings, true)) {
6✔
193
                $this->allowedEncodings[] = $encoding;
6✔
194
            }
195
        }
196

197
        return $this;
6✔
198
    }
199

200
    /**
201
     * @inheritDoc
202
     */
203
    public function removeEncodings(string ...$encodings): self
2✔
204
    {
205
        $this->allowedEncodings = \array_values(
2✔
206
            \array_diff($this->allowedEncodings, $encodings)
2✔
207
        );
2✔
208

209
        return $this;
2✔
210
    }
211

212
    /**
213
     * @inheritDoc
214
     */
215
    public function getEncodings(): array
5✔
216
    {
217
        return $this->allowedEncodings;
5✔
218
    }
219

220
    /**
221
     * @inheritDoc
222
     */
223
    public function resetEncodings(): self
1✔
224
    {
225
        $this->allowedEncodings = [
1✔
226
            self::AUTO,
1✔
227
            self::ENCODING_UTF8,
1✔
228
            self::WINDOWS_1252,
1✔
229
            self::ENCODING_ISO,
1✔
230
            self::ENCODING_ASCII,
1✔
231
            self::ENCODING_UTF16,
1✔
232
            self::ENCODING_UTF32,
1✔
233
        ];
1✔
234

235
        return $this;
1✔
236
    }
237

238
    /**
239
     * @inheritDoc
240
     */
UNCOV
241
    public function registerInterpreter(TypeInterpreterInterface $interpreter, ?int $priority = null): self
×
242
    {
UNCOV
243
        $this->interpreterChain->register($interpreter, $priority);
×
244

UNCOV
245
        return $this;
×
246
    }
247

248
    /**
249
     * @inheritDoc
250
     */
UNCOV
251
    public function unregisterInterpreter(TypeInterpreterInterface $interpreter): self
×
252
    {
UNCOV
253
        $this->interpreterChain->unregister($interpreter);
×
254

UNCOV
255
        return $this;
×
256
    }
257

258
    /**
259
     * @inheritDoc
260
     */
261
    public function registerPropertyMapper(string $className, PropertyMapperInterface $mapper): self
2✔
262
    {
263
        $objectInterpreter = $this->interpreterChain->getObjectInterpreter();
2✔
264

265
        if (null === $objectInterpreter) {
2✔
266
            throw new RuntimeException('ObjectInterpreter not registered in chain');
1✔
267
        }
268

269
        $objectInterpreter->registerMapper($className, $mapper);
1✔
270

271
        return $this;
1✔
272
    }
273

274
    /**
275
     * @inheritDoc
276
     */
277
    public function resetInterpreters(): self
111✔
278
    {
279
        $this->interpreterChain = new InterpreterChain();
111✔
280
        $this->interpreterChain->register(new StringInterpreter(), 100);
111✔
281
        $this->interpreterChain->register(new ArrayInterpreter($this->interpreterChain), 50);
111✔
282
        $this->interpreterChain->register(new ObjectInterpreter($this->interpreterChain), 30);
111✔
283

284
        return $this;
111✔
285
    }
286

287
    /**
288
     * @inheritDoc
289
     */
290
    public function detect(string $string, array $options = []): string
17✔
291
    {
292
        if ($this->isValidUtf8($string)) {
17✔
293
            return self::ENCODING_UTF8;
14✔
294
        }
295

296
        $detected = $this->detectorChain->detect($string, $options);
3✔
297

298
        return $detected ?? self::ENCODING_ISO;
3✔
299
    }
300

301
    /**
302
     * @inheritDoc
303
     */
304
    public function detectBatch(iterable $items, array $options = []): string
12✔
305
    {
306
        /** @var mixed $maxSamples */
307
        $maxSamples = $options['maxSamples'] ?? self::DEFAULT_MAX_SAMPLES;
12✔
308
        if (!\is_int($maxSamples) || 1 > $maxSamples) {
12✔
309
            $maxSamples = self::DEFAULT_MAX_SAMPLES;
1✔
310
        }
311

312
        /** @var list<string> $samples */
313
        $samples = [];
12✔
314

315
        /** @var mixed $item */
316
        foreach ($items as $item) {
12✔
317
            if (\is_string($item) && '' !== $item) {
12✔
318
                $samples[] = $item;
8✔
319
                if (\count($samples) >= $maxSamples) {
8✔
320
                    break;
6✔
321
                }
322
            }
323
        }
324

325
        // Fast return.
326
        if (empty($samples)) {
12✔
327
            return self::ENCODING_ISO;
4✔
328
        }
329

330
        // Fast path: single sample (default behavior)
331
        if (1 === $maxSamples) {
8✔
332
            return $this->detect($samples[0], $options);
6✔
333
        }
334

335
        // Detect on longest sample (more reliable for multiple samples)
336
        $longest = \array_reduce(
2✔
337
            $samples,
2✔
338
            /**
339
             * @param null|string $carry
340
             * @param string $item
341
             */
342
            static fn ($carry, $item) => \strlen($item) > \strlen($carry ?? '') ? $item : $carry
2✔
343
        );
2✔
344

345
        return $this->detect($longest, $options);
2✔
346
    }
347

348
    /**
349
     * @inheritDoc
350
     */
351
    public function toCharset(
79✔
352
        $data,
353
        string $to = self::ENCODING_UTF8,
354
        string $from = self::ENCODING_ISO,
355
        array $options = []
356
    ) {
357
        $this->validateEncoding($to, 'target');
79✔
358
        $this->validateEncoding($from, 'source');
78✔
359

360
        $options = $this->configureOptions($options);
77✔
361

362
        // We define the callback logic for a single string
363
        /**
364
         * @psalm-suppress MissingClosureParamType
365
         * @psalm-suppress MissingClosureReturnType
366
         */
367
        $callback = fn ($value) => $this->convertValue($value, $to, $from, $options);
77✔
368

369
        return $this->applyRecursive($data, $callback);
77✔
370
    }
371

372
    /**
373
     * Converts anything (string, array, object) to UTF-8.
374
     *
375
     * @param mixed $data Data to convert
376
     * @param string $from Source encoding
377
     * @param array<string, mixed> $options Conversion options
378
     *                                      - 'normalize': bool (default: true)
379
     *                                      - 'translit': bool (default: true)
380
     *                                      - 'ignore': bool (default: true)
381
     *
382
     * @return mixed
383
     *
384
     * @throws InvalidArgumentException If encoding is invalid
385
     *
386
     * @psalm-api
387
     */
388
    public function toUtf8($data, string $from = self::WINDOWS_1252, array $options = [])
13✔
389
    {
390
        return $this->toCharset($data, self::ENCODING_UTF8, $from, $options);
13✔
391
    }
392

393
    /**
394
     * Converts anything to ISO-8859-1 (Windows-1252).
395
     *
396
     * @param mixed $data Data to convert
397
     * @param string $from Source encoding
398
     * @param array<string, mixed> $options Conversion options
399
     *                                      - 'normalize': bool (default: true)
400
     *                                      - 'translit': bool (default: true)
401
     *                                      - 'ignore': bool (default: true)
402
     *
403
     * @return mixed
404
     *
405
     * @throws InvalidArgumentException If encoding is invalid
406
     *
407
     * @psalm-api
408
     */
409
    public function toIso($data, string $from = self::ENCODING_UTF8, array $options = [])
1✔
410
    {
411
        return $this->toCharset($data, self::WINDOWS_1252, $from, $options);
1✔
412
    }
413

414
    /**
415
     * @inheritDoc
416
     */
417
    public function toCharsetBatch(
6✔
418
        array $items,
419
        string $to = self::ENCODING_UTF8,
420
        string $from = self::ENCODING_ISO,
421
        array $options = []
422
    ): array {
423
        $this->validateEncoding($to, 'target');
6✔
424
        $this->validateEncoding($from, 'source');
6✔
425

426
        if (self::AUTO === $from) {
6✔
427
            $from = $this->detectBatch($items, $options);
1✔
428
        }
429

430
        /** @psalm-suppress MissingClosureReturnType */
431
        return \array_map(fn ($item) => $this->toCharset($item, $to, $from, $options), $items);
6✔
432
    }
433

434
    /**
435
     * Batch convert array items from one encoding to utf8.
436
     *
437
     * Optimized for homogeneous arrays: detects encoding once on first non-empty string.
438
     * Use this instead of toUtf8() when processing large arrays with AUTO detection.
439
     *
440
     * @param array<mixed> $items Items to convert
441
     * @param string $from Source encoding (use AUTO for detection)
442
     * @param array<string, mixed> $options Conversion options
443
     *
444
     * @return array<mixed> Converted items
445
     *
446
     * @throws InvalidArgumentException If encoding is
447
     *
448
     * @psalm-api
449
     */
450
    public function toUtf8Batch(
1✔
451
        array $items,
452
        string $from = self::WINDOWS_1252,
453
        array $options = []
454
    ): array {
455
        return $this->toCharsetBatch($items, self::ENCODING_UTF8, $from, $options);
1✔
456
    }
457

458
    /**
459
     * Batch convert array items from one encoding to iso.
460
     *
461
     * Optimized for homogeneous arrays: detects encoding once on first non-empty string.
462
     * Use this instead of toIso() when processing large arrays with AUTO detection.
463
     *
464
     * @param array<mixed> $items Items to convert
465
     * @param string $from Source encoding (use AUTO for detection)
466
     * @param array<string, mixed> $options Conversion options
467
     *
468
     * @return array<mixed> Converted items
469
     *
470
     * @throws InvalidArgumentException If encoding is invalid
471
     *
472
     * @psalm-api
473
     */
474
    public function toIsoBatch(
1✔
475
        array $items,
476
        string $from = self::ENCODING_UTF8,
477
        array $options = []
478
    ): array {
479
        return $this->toCharsetBatch($items, self::WINDOWS_1252, $from, $options);
1✔
480
    }
481

482
    /**
483
     * @inheritDoc
484
     */
485
    public function repair(
30✔
486
        $data,
487
        string $to = self::ENCODING_UTF8,
488
        string $from = self::ENCODING_ISO,
489
        array $options = []
490
    ) {
491
        $options = $this->configureOptions($options, ['maxDepth' => self::MAX_REPAIR_DEPTH]);
30✔
492

493
        /**
494
         * @psalm-suppress MissingClosureParamType
495
         * @psalm-suppress MissingClosureReturnType
496
         */
497
        $callback = fn ($value) => $this->repairValue($value, $to, $from, $options);
30✔
498

499
        return $this->applyRecursive($data, $callback);
30✔
500
    }
501

502
    /**
503
     * @inheritDoc
504
     */
505
    public function safeJsonEncode(
6✔
506
        $data,
507
        int $flags = 0,
508
        int $depth = self::JSON_DEFAULT_DEPTH,
509
        string $from = self::WINDOWS_1252
510
    ): string {
511
        /** @var mixed $data */
512
        $data = $this->repair($data, self::ENCODING_UTF8, $from);
6✔
513

514
        // Force JSON_THROW_ON_ERROR flag
515
        return \json_encode($data, $flags | \JSON_THROW_ON_ERROR, $depth);
6✔
516
    }
517

518
    /**
519
     * @inheritDoc
520
     */
521
    public function safeJsonDecode(
9✔
522
        string $json,
523
        ?bool $associative = null,
524
        int $depth = self::JSON_DEFAULT_DEPTH,
525
        int $flags = 0,
526
        string $to = self::ENCODING_UTF8,
527
        string $from = self::WINDOWS_1252
528
    ) {
529
        // Repair string to a valid UTF-8 for decoding
530
        /** @var string $data */
531
        $data = $this->repair($json, self::ENCODING_UTF8, $from);
9✔
532

533
        // Force JSON_THROW_ON_ERROR flag
534
        /** @var mixed $result */
535
        $result = \json_decode($data, $associative, $depth, $flags | \JSON_THROW_ON_ERROR);
9✔
536

537
        return $this->toCharset($result, $to, self::ENCODING_UTF8);
7✔
538
    }
539

540
    /**
541
     * Applies a callback recursively using type interpreters.
542
     *
543
     * @param mixed $data Data to process
544
     * @param callable $callback Processing callback function
545
     *
546
     * @return mixed
547
     */
548
    private function applyRecursive($data, callable $callback)
82✔
549
    {
550
        return $this->interpreterChain->interpret($data, $callback, []);
82✔
551
    }
552

553
    /**
554
     * Converts a single value to target encoding.
555
     *
556
     * @param mixed $value Value to convert
557
     * @param string $to Target encoding
558
     * @param string $from Source encoding
559
     * @param array<string, mixed> $options Conversion configuration
560
     *
561
     * @return mixed
562
     */
563
    private function convertValue($value, string $to, string $from, array $options)
74✔
564
    {
565
        if (!\is_string($value)) {
74✔
UNCOV
566
            return $value;
×
567
        }
568

569
        // Special handling when converting FROM UTF-8
570
        // Do not trust mbstring when return utf-8 but we want another encoding,
571
        // because it will return true even if it's not really valid.
572
        if (self::ENCODING_UTF8 !== $to && $this->isValidUtf8($value)) {
74✔
573
            return $this->convertString($value, $to, self::ENCODING_UTF8, $options);
7✔
574
        }
575

576
        // Check if already in target encoding
577
        if (\mb_check_encoding($value, $to)) {
67✔
578
            return $this->normalize($value, $to, $options);
55✔
579
        }
580

581
        return $this->convertString($value, $to, $from, $options);
13✔
582
    }
583

584
    /**
585
     * Low-level string conversion logic.
586
     *
587
     * @param string $data String to convert
588
     * @param string $to Target encoding
589
     * @param string $from Source encoding
590
     * @param array<string, mixed> $options Conversion options
591
     *
592
     * @return string Converted string or $data if convertion failed
593
     */
594
    private function convertString(string $data, string $to, string $from, array $options): string
20✔
595
    {
596
        return $this->transcodeString($data, $to, $from, $options) ?? $data;
20✔
597
    }
598

599
    /**
600
     * Low-level string transcode logic with fallback strategies.
601
     *
602
     * @param string $data String to transcode
603
     * @param string $to Target encoding
604
     * @param string $from Source encoding
605
     * @param array<string, mixed> $options Conversion options
606
     *
607
     * @return ?string Converted string or null if failed.
608
     */
609
    private function transcodeString(string $data, string $to, string $from, array $options): ?string
43✔
610
    {
611
        // Optimize: detect once if both are AUTO
612
        $detectedEncoding = null;
43✔
613
        if (self::AUTO === $to || self::AUTO === $from) {
43✔
UNCOV
614
            $detectedEncoding = $this->detect($data, $options);
×
615
        }
616

617
        /** @var string $targetEncoding */
618
        $targetEncoding = self::AUTO === $to ? $detectedEncoding : $to;
43✔
619
        /** @var string $sourceEncoding */
620
        $sourceEncoding = self::AUTO === $from ? $detectedEncoding : $from;
43✔
621

622
        $result = $this->transcoderChain->transcode($data, $targetEncoding, $sourceEncoding, $options);
43✔
623

624
        if (null !== $result && self::ENCODING_UTF8 === $targetEncoding) {
43✔
625
            return $this->normalize($result, $targetEncoding, $options);
14✔
626
        }
627

628
        return $result;
29✔
629
    }
630

631
    /**
632
     * Repairs a double-encoded value.
633
     *
634
     * @param mixed $value Value to repair
635
     * @param string $to Target encoding
636
     * @param string $from Glitch encoding
637
     * @param array<string, mixed> $options Configuration
638
     *
639
     * @return mixed
640
     */
641
    private function repairValue($value, string $to, string $from, array $options)
25✔
642
    {
643
        if (!\is_string($value)) {
25✔
644
            // @codeCoverageIgnoreStart
645
            return $value;
646
            // @codeCoverageIgnoreEnd
647
        }
648

649
        /** @var mixed $maxDepth */
650
        $maxDepth = $options['maxDepth'] ?? self::MAX_REPAIR_DEPTH;
25✔
651
        if (!\is_int($maxDepth)) {
25✔
652
            $maxDepth = self::MAX_REPAIR_DEPTH;
2✔
653
        }
654

655
        $fixed = $this->peelEncodingLayers($value, $from, $maxDepth);
25✔
656
        $detectedEncoding = $this->isValidUtf8($fixed) ? self::ENCODING_UTF8 : $from;
25✔
657

658
        return $this->toCharset($fixed, $to, $detectedEncoding, $options);
25✔
659
    }
660

661
    /**
662
     * Attempts to remove multiple encoding layers.
663
     *
664
     * @param string $value String to repair
665
     * @param string $from Encoding to reverse
666
     * @param int $maxDepth Maximum iterations
667
     *
668
     * @return string Repaired string
669
     */
670
    private function peelEncodingLayers(string $value, string $from, int $maxDepth): string
25✔
671
    {
672
        $fixed = $value;
25✔
673
        $iterations = 0;
25✔
674
        $options = ['normalize' => false, 'translit' => false, 'ignore' => false];
25✔
675

676
        // Loop while it looks like valid UTF-8
677
        while ($iterations < $maxDepth && $this->isValidUtf8($fixed)) {
25✔
678
            // Attempt to reverse convert (UTF-8 -> $from)
679
            $test = $this->transcodeString($fixed, $from, self::ENCODING_UTF8, $options);
23✔
680

681
            // Break if conversion failed, no change, or result is longer (infinite loop detection)
682
            if (null === $test || $test === $fixed || \strlen($test) >= \strlen($fixed) || !$this->isValidUtf8($test)) {
23✔
683
                break;
23✔
684
            }
685

686
            // If conversion worked AND result is still valid UTF-8 AND result is different
UNCOV
687
            $fixed = $test;
×
UNCOV
688
            $iterations++;
×
689
        }
690

691
        return $fixed;
25✔
692
    }
693

694
    /**
695
     * Normalizes UTF-8 string if needed.
696
     *
697
     * @param string $value String to normalize
698
     * @param string $to Target encoding
699
     * @param array<string, mixed> $options Configuration
700
     *
701
     * @return string Normalized or original string
702
     *
703
     * @codeCoverageIgnore
704
     */
705
    private function normalize(string $value, string $to, array $options): string
706
    {
707
        // Only normalize if: target is UTF-8 AND normalize option is true
708
        if (self::ENCODING_UTF8 !== $to || false === ($options['normalize'] ?? true)) {
709
            return $value;
710
        }
711

712
        if (!\class_exists(Normalizer::class)) {
713
            return $value;
714
        }
715

716
        $normalized = Normalizer::normalize($value);
717

718
        return false !== $normalized ? $normalized : $value;
719
    }
720

721
    /**
722
     * Checks if string is valid UTF-8.
723
     *
724
     * Please not that it will use mb_check_encoding internally,
725
     * and could return true also if it's not really a full utf8 string.
726
     *
727
     * @param string $string String to check
728
     *
729
     * @return bool True if valid UTF-8
730
     */
731
    private function isValidUtf8(string $string): bool
49✔
732
    {
733
        return \mb_check_encoding($string, self::ENCODING_UTF8);
49✔
734
    }
735

736
    /**
737
     * Validates encoding name against whitelist.
738
     *
739
     * @param string $encoding Encoding to validate
740
     * @param string $type Type for error message (e.g., 'source', 'target')
741
     *
742
     * @throws InvalidArgumentException If encoding is not allowed
743
     */
744
    private function validateEncoding(string $encoding, string $type): void
80✔
745
    {
746
        $normalized = \strtoupper($encoding);
80✔
747

748
        if (
749
            !\in_array($encoding, $this->allowedEncodings, true)
80✔
750
            && !\in_array($normalized, $this->allowedEncodings, true)
80✔
751
        ) {
752
            throw new InvalidArgumentException(
2✔
753
                \sprintf(
2✔
754
                    'Invalid %s encoding: "%s". Allowed: %s',
2✔
755
                    $type,
2✔
756
                    $encoding,
2✔
757
                    \implode(', ', $this->allowedEncodings)
2✔
758
                )
2✔
759
            );
2✔
760
        }
761
    }
762

763
    /**
764
     * Builds conversion configuration with defaults.
765
     *
766
     * Merges user options with default values, allowing multiple override layers.
767
     *
768
     * @param array<string, mixed> $options User-provided options
769
     * @param array<string, mixed> ...$replacements Additional override layers
770
     *
771
     * @return array<string, mixed> Merged configuration
772
     *
773
     * @example
774
     * // Basic usage
775
     * $config = self::configureOptions(['normalize' => false]);
776
     *
777
     * // With additional defaults
778
     * $config = self::configureOptions(
779
     *     ['normalize' => false],
780
     *     ['maxDepth' => 10]
781
     * );
782
     */
783
    private function configureOptions(array $options, array ...$replacements): array
82✔
784
    {
785
        $replacements[] = $options;
82✔
786

787
        return \array_replace(
82✔
788
            ['normalize' => true, 'translit' => true, 'ignore' => true, 'encodings' => self::DEFAULT_ENCODINGS],
82✔
789
            ...$replacements
82✔
790
        );
82✔
791
    }
792
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc