• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

move-elevator / composer-translation-validator / 18559927341

16 Oct 2025 11:35AM UTC coverage: 95.519%. Remained the same
18559927341

Pull #73

github

jackd248
build: add php-cs-fixer-preset
Pull Request #73: build: add php-cs-fixer-preset

206 of 210 new or added lines in 16 files covered. (98.1%)

91 existing lines in 20 files now uncovered.

2345 of 2455 relevant lines covered (95.52%)

7.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.11
/src/Validator/EncodingValidator.php
1
<?php
2

3
declare(strict_types=1);
4

5
/*
6
 * This file is part of the "composer-translation-validator" Composer plugin.
7
 *
8
 * (c) 2025 Konrad Michalik <km@move-elevator.de>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13

14
namespace MoveElevator\ComposerTranslationValidator\Validator;
15

16
use MoveElevator\ComposerTranslationValidator\Parser\{JsonParser, ParserInterface, PhpParser, XliffParser, YamlParser};
17
use MoveElevator\ComposerTranslationValidator\Result\Issue;
18
use Normalizer;
19

20
use function is_string;
21
use function sprintf;
22

23
/**
24
 * EncodingValidator.
25
 *
26
 * @author Konrad Michalik <km@move-elevator.de>
27
 * @license GPL-3.0-or-later
28
 */
29
class EncodingValidator extends AbstractValidator implements ValidatorInterface
30
{
31
    /**
32
     * @return array<string, string>
33
     */
34
    public function processFile(ParserInterface $file): array
15✔
35
    {
36
        $filePath = $file->getFilePath();
15✔
37
        $issues = [];
15✔
38

39
        if (!file_exists($filePath)) {
15✔
40
            $this->logger?->error(
1✔
41
                'File does not exist: '.$file->getFileName(),
1✔
42
            );
1✔
43

44
            return [];
1✔
45
        }
46

47
        // Read raw file content
48
        $content = file_get_contents($filePath);
14✔
49
        if (false === $content) {
14✔
UNCOV
50
            $this->logger?->error(
×
UNCOV
51
                'Could not read file content: '.$file->getFileName(),
×
UNCOV
52
            );
×
53

UNCOV
54
            return [];
×
55
        }
56

57
        // Early exit for empty files
58
        if ('' === $content) {
14✔
59
            return [];
1✔
60
        }
61

62
        // Check UTF-8 encoding first - if invalid, other checks may fail
63
        if (!$this->isValidUtf8($content)) {
13✔
64
            $issues['encoding'] = 'File is not valid UTF-8 encoded';
1✔
65

66
            // Skip other checks for invalid UTF-8 content
67
            return $issues;
1✔
68
        }
69

70
        // Check for BOM (fast byte check)
71
        $hasBom = $this->hasByteOrderMark($content);
12✔
72
        if ($hasBom) {
12✔
73
            $issues['bom'] = 'File contains UTF-8 Byte Order Mark (BOM)';
3✔
74
        }
75

76
        // Check for invisible/problematic characters
77
        $invisibleChars = $this->findInvisibleCharacters($content);
12✔
78
        if (!empty($invisibleChars)) {
12✔
79
            $issues['invisible_chars'] = sprintf(
5✔
80
                'File contains invisible characters: %s',
5✔
81
                implode(', ', array_unique($invisibleChars)),
5✔
82
            );
5✔
83
        }
84

85
        // Check Unicode normalization (expensive, only if intl available)
86
        if ($this->hasUnicodeNormalizationIssues($content)) {
12✔
87
            $issues['unicode_normalization'] = 'File contains non-NFC normalized Unicode characters';
2✔
88
        }
89

90
        // Note: JSON syntax validation is handled by JsonParser constructor
91
        // Invalid JSON files will throw exceptions before reaching this validator
92

93
        return $issues;
12✔
94
    }
95

96
    public function formatIssueMessage(Issue $issue, string $prefix = ''): string
1✔
97
    {
98
        $details = $issue->getDetails();
1✔
99
        $resultType = $this->resultTypeOnValidationFailure();
1✔
100

101
        $level = $resultType->toString();
1✔
102
        $color = $resultType->toColorString();
1✔
103

104
        $messages = [];
1✔
105
        foreach ($details as $type => $message) {
1✔
106
            if (is_string($type) && is_string($message)) {
1✔
107
                $messages[] = "- <fg=$color>$level</> {$prefix}encoding issue: $message";
1✔
108
            }
109
        }
110

111
        return implode("\n", $messages);
1✔
112
    }
113

114
    /**
115
     * @return class-string<ParserInterface>[]
116
     */
117
    public function supportsParser(): array
1✔
118
    {
119
        return [XliffParser::class, YamlParser::class, JsonParser::class, PhpParser::class];
1✔
120
    }
121

122
    public function resultTypeOnValidationFailure(): ResultType
2✔
123
    {
124
        return ResultType::WARNING;
2✔
125
    }
126

127
    private function isValidUtf8(string $content): bool
13✔
128
    {
129
        return mb_check_encoding($content, 'UTF-8');
13✔
130
    }
131

132
    private function hasByteOrderMark(string $content): bool
12✔
133
    {
134
        // UTF-8 BOM is 0xEF 0xBB 0xBF
135
        return str_starts_with($content, "\xEF\xBB\xBF");
12✔
136
    }
137

138
    /**
139
     * @return array<string>
140
     */
141
    private function findInvisibleCharacters(string $content): array
12✔
142
    {
143
        $problematicChars = [];
12✔
144

145
        // Early exit for ASCII-only content (performance optimization)
146
        if (mb_check_encoding($content, 'ASCII')) {
12✔
147
            // Only check for control characters in ASCII content
148
            if (preg_match('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', $content)) {
4✔
149
                $problematicChars[] = 'Control characters';
1✔
150
            }
151

152
            return $problematicChars;
4✔
153
        }
154

155
        // Check for problematic Unicode characters individually for better performance
156
        $charMap = [
8✔
157
            "\u{200B}" => 'Zero-width space',
8✔
158
            "\u{200C}" => 'Zero-width non-joiner',
8✔
159
            "\u{200D}" => 'Zero-width joiner',
8✔
160
            "\u{2060}" => 'Word joiner',
8✔
161
            "\u{FEFF}" => 'Zero-width no-break space',
8✔
162
            "\u{200E}" => 'Left-to-right mark',
8✔
163
            "\u{200F}" => 'Right-to-left mark',
8✔
164
            "\u{00AD}" => 'Soft hyphen',
8✔
165
        ];
8✔
166

167
        foreach ($charMap as $char => $name) {
8✔
168
            if (str_contains($content, $char)) {
8✔
169
                $problematicChars[] = $name;
4✔
170
            }
171
        }
172

173
        // Check for control characters (except allowed whitespace)
174
        if (preg_match('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', $content)) {
8✔
UNCOV
175
            $problematicChars[] = 'Control characters';
×
176
        }
177

178
        return $problematicChars;
8✔
179
    }
180

181
    private function hasUnicodeNormalizationIssues(string $content): bool
12✔
182
    {
183
        if (!class_exists('Normalizer')) {
12✔
UNCOV
184
            return false;
×
185
        }
186

187
        $normalized = Normalizer::normalize($content, Normalizer::FORM_C);
12✔
188

189
        return false !== $normalized && $content !== $normalized;
12✔
190
    }
191
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc