• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

move-elevator / composer-translation-validator / 16267221707

14 Jul 2025 12:45PM UTC coverage: 96.161% (-0.1%) from 96.277%
16267221707

Pull #34

github

jackd248
feat: add symbol whitelist for Normalizer in composer.json
Pull Request #34: feat: enhance EncodingValidator with performance optimizations

18 of 19 new or added lines in 1 file covered. (94.74%)

2 existing lines in 1 file now uncovered.

1653 of 1719 relevant lines covered (96.16%)

7.61 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.14
/src/Validator/EncodingValidator.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace MoveElevator\ComposerTranslationValidator\Validator;
6

7
use MoveElevator\ComposerTranslationValidator\Parser\JsonParser;
8
use MoveElevator\ComposerTranslationValidator\Parser\ParserInterface;
9
use MoveElevator\ComposerTranslationValidator\Parser\PhpParser;
10
use MoveElevator\ComposerTranslationValidator\Parser\XliffParser;
11
use MoveElevator\ComposerTranslationValidator\Parser\YamlParser;
12
use MoveElevator\ComposerTranslationValidator\Result\Issue;
13

14
class EncodingValidator extends AbstractValidator implements ValidatorInterface
15
{
16
    /**
17
     * @return array<string, string>
18
     */
19
    public function processFile(ParserInterface $file): array
12✔
20
    {
21
        $filePath = $file->getFilePath();
12✔
22
        $issues = [];
12✔
23

24
        // Read raw file content
25
        $content = file_get_contents($filePath);
12✔
26
        if (false === $content) {
12✔
27
            $this->logger?->error(
×
28
                'Could not read file content: '.$file->getFileName()
×
29
            );
×
30

31
            return [];
×
32
        }
33

34
        // Early exit for empty files
35
        if ('' === $content) {
12✔
NEW
36
            return [];
×
37
        }
38

39
        // Check UTF-8 encoding first - if invalid, other checks may fail
40
        if (!$this->isValidUtf8($content)) {
12✔
41
            $issues['encoding'] = 'File is not valid UTF-8 encoded';
1✔
42

43
            // Skip other checks for invalid UTF-8 content
44
            return $issues;
1✔
45
        }
46

47
        // Check for BOM (fast byte check)
48
        $hasBom = $this->hasByteOrderMark($content);
11✔
49
        if ($hasBom) {
11✔
50
            $issues['bom'] = 'File contains UTF-8 Byte Order Mark (BOM)';
3✔
51
        }
52

53
        // Check for invisible/problematic characters
54
        $invisibleChars = $this->findInvisibleCharacters($content);
11✔
55
        if (!empty($invisibleChars)) {
11✔
56
            $issues['invisible_chars'] = sprintf(
5✔
57
                'File contains invisible characters: %s',
5✔
58
                implode(', ', array_unique($invisibleChars))
5✔
59
            );
5✔
60
        }
61

62
        // Check Unicode normalization (expensive, only if intl available)
63
        if ($this->hasUnicodeNormalizationIssues($content)) {
11✔
64
            $issues['unicode_normalization'] = 'File contains non-NFC normalized Unicode characters';
1✔
65
        }
66

67
        // Note: JSON syntax validation is handled by JsonParser constructor
68
        // Invalid JSON files will throw exceptions before reaching this validator
69

70
        return $issues;
11✔
71
    }
72

73
    public function formatIssueMessage(Issue $issue, string $prefix = ''): string
1✔
74
    {
75
        $details = $issue->getDetails();
1✔
76
        $resultType = $this->resultTypeOnValidationFailure();
1✔
77

78
        $level = $resultType->toString();
1✔
79
        $color = $resultType->toColorString();
1✔
80

81
        $messages = [];
1✔
82
        foreach ($details as $type => $message) {
1✔
83
            if (is_string($type) && is_string($message)) {
1✔
84
                $messages[] = "- <fg=$color>$level</> {$prefix}encoding issue: $message";
1✔
85
            }
86
        }
87

88
        return implode("\n", $messages);
1✔
89
    }
90

91
    /**
92
     * @return class-string<ParserInterface>[]
93
     */
94
    public function supportsParser(): array
1✔
95
    {
96
        return [XliffParser::class, YamlParser::class, JsonParser::class, PhpParser::class];
1✔
97
    }
98

99
    public function resultTypeOnValidationFailure(): ResultType
2✔
100
    {
101
        return ResultType::WARNING;
2✔
102
    }
103

104
    private function isValidUtf8(string $content): bool
12✔
105
    {
106
        return mb_check_encoding($content, 'UTF-8');
12✔
107
    }
108

109
    private function hasByteOrderMark(string $content): bool
11✔
110
    {
111
        // UTF-8 BOM is 0xEF 0xBB 0xBF
112
        return str_starts_with($content, "\xEF\xBB\xBF");
11✔
113
    }
114

115
    /**
116
     * @return array<string>
117
     */
118
    private function findInvisibleCharacters(string $content): array
11✔
119
    {
120
        $problematicChars = [];
11✔
121

122
        // Early exit for ASCII-only content (performance optimization)
123
        if (mb_check_encoding($content, 'ASCII')) {
11✔
124
            // Only check for control characters in ASCII content
125
            if (preg_match('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', $content)) {
5✔
126
                $problematicChars[] = 'Control characters';
1✔
127
            }
128

129
            return $problematicChars;
5✔
130
        }
131

132
        // Check for problematic Unicode characters individually for better performance
133
        $charMap = [
6✔
134
            "\u{200B}" => 'Zero-width space',
6✔
135
            "\u{200C}" => 'Zero-width non-joiner',
6✔
136
            "\u{200D}" => 'Zero-width joiner',
6✔
137
            "\u{2060}" => 'Word joiner',
6✔
138
            "\u{FEFF}" => 'Zero-width no-break space',
6✔
139
            "\u{200E}" => 'Left-to-right mark',
6✔
140
            "\u{200F}" => 'Right-to-left mark',
6✔
141
            "\u{00AD}" => 'Soft hyphen',
6✔
142
        ];
6✔
143

144
        foreach ($charMap as $char => $name) {
6✔
145
            if (str_contains($content, $char)) {
6✔
146
                $problematicChars[] = $name;
4✔
147
            }
148
        }
149

150
        // Check for control characters (except allowed whitespace)
151
        if (preg_match('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', $content)) {
6✔
UNCOV
152
            $problematicChars[] = 'Control characters';
×
153
        }
154

155
        return $problematicChars;
6✔
156
    }
157

158
    private function hasUnicodeNormalizationIssues(string $content): bool
11✔
159
    {
160
        if (!class_exists('Normalizer')) {
11✔
UNCOV
161
            return false;
×
162
        }
163

164
        $normalized = \Normalizer::normalize($content, \Normalizer::FORM_C);
11✔
165

166
        return false !== $normalized && $content !== $normalized;
11✔
167
    }
168
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc