• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

move-elevator / composer-translation-validator / 16568299985

28 Jul 2025 11:53AM UTC coverage: 96.664% (-0.2%) from 96.835%
16568299985

push

github

web-flow
Merge pull request #50 from move-elevator/fix-key-naming-enum

fix: improve error handling in EncodingValidator and update expected_convention in tests

6 of 6 new or added lines in 2 files covered. (100.0%)

4 existing lines in 1 file now uncovered.

2173 of 2248 relevant lines covered (96.66%)

8.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.11
/src/Validator/EncodingValidator.php
1
<?php
2

3
declare(strict_types=1);
4

5
/*
6
 * This file is part of the Composer plugin "composer-translation-validator".
7
 *
8
 * Copyright (C) 2025 Konrad Michalik <km@move-elevator.de>
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation, either version 3 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
22
 */
23

24
namespace MoveElevator\ComposerTranslationValidator\Validator;
25

26
use MoveElevator\ComposerTranslationValidator\Parser\JsonParser;
27
use MoveElevator\ComposerTranslationValidator\Parser\ParserInterface;
28
use MoveElevator\ComposerTranslationValidator\Parser\PhpParser;
29
use MoveElevator\ComposerTranslationValidator\Parser\XliffParser;
30
use MoveElevator\ComposerTranslationValidator\Parser\YamlParser;
31
use MoveElevator\ComposerTranslationValidator\Result\Issue;
32
use Normalizer;
33

34
class EncodingValidator extends AbstractValidator implements ValidatorInterface
35
{
36
    /**
37
     * @return array<string, string>
38
     */
39
    public function processFile(ParserInterface $file): array
15✔
40
    {
41
        $filePath = $file->getFilePath();
15✔
42
        $issues = [];
15✔
43

44
        if (!file_exists($filePath)) {
15✔
45
            $this->logger?->error(
1✔
46
                'File does not exist: '.$file->getFileName(),
1✔
47
            );
1✔
48

49
            return [];
1✔
50
        }
51

52
        // Read raw file content
53
        $content = file_get_contents($filePath);
14✔
54
        if (false === $content) {
14✔
UNCOV
55
            $this->logger?->error(
×
UNCOV
56
                'Could not read file content: '.$file->getFileName(),
×
UNCOV
57
            );
×
58

UNCOV
59
            return [];
×
60
        }
61

62
        // Early exit for empty files
63
        if ('' === $content) {
14✔
64
            return [];
1✔
65
        }
66

67
        // Check UTF-8 encoding first - if invalid, other checks may fail
68
        if (!$this->isValidUtf8($content)) {
13✔
69
            $issues['encoding'] = 'File is not valid UTF-8 encoded';
1✔
70

71
            // Skip other checks for invalid UTF-8 content
72
            return $issues;
1✔
73
        }
74

75
        // Check for BOM (fast byte check)
76
        $hasBom = $this->hasByteOrderMark($content);
12✔
77
        if ($hasBom) {
12✔
78
            $issues['bom'] = 'File contains UTF-8 Byte Order Mark (BOM)';
3✔
79
        }
80

81
        // Check for invisible/problematic characters
82
        $invisibleChars = $this->findInvisibleCharacters($content);
12✔
83
        if (!empty($invisibleChars)) {
12✔
84
            $issues['invisible_chars'] = sprintf(
5✔
85
                'File contains invisible characters: %s',
5✔
86
                implode(', ', array_unique($invisibleChars)),
5✔
87
            );
5✔
88
        }
89

90
        // Check Unicode normalization (expensive, only if intl available)
91
        if ($this->hasUnicodeNormalizationIssues($content)) {
12✔
92
            $issues['unicode_normalization'] = 'File contains non-NFC normalized Unicode characters';
2✔
93
        }
94

95
        // Note: JSON syntax validation is handled by JsonParser constructor
96
        // Invalid JSON files will throw exceptions before reaching this validator
97

98
        return $issues;
12✔
99
    }
100

101
    public function formatIssueMessage(Issue $issue, string $prefix = ''): string
1✔
102
    {
103
        $details = $issue->getDetails();
1✔
104
        $resultType = $this->resultTypeOnValidationFailure();
1✔
105

106
        $level = $resultType->toString();
1✔
107
        $color = $resultType->toColorString();
1✔
108

109
        $messages = [];
1✔
110
        foreach ($details as $type => $message) {
1✔
111
            if (is_string($type) && is_string($message)) {
1✔
112
                $messages[] = "- <fg=$color>$level</> {$prefix}encoding issue: $message";
1✔
113
            }
114
        }
115

116
        return implode("\n", $messages);
1✔
117
    }
118

119
    /**
120
     * @return class-string<ParserInterface>[]
121
     */
122
    public function supportsParser(): array
1✔
123
    {
124
        return [XliffParser::class, YamlParser::class, JsonParser::class, PhpParser::class];
1✔
125
    }
126

127
    public function resultTypeOnValidationFailure(): ResultType
2✔
128
    {
129
        return ResultType::WARNING;
2✔
130
    }
131

132
    private function isValidUtf8(string $content): bool
13✔
133
    {
134
        return mb_check_encoding($content, 'UTF-8');
13✔
135
    }
136

137
    private function hasByteOrderMark(string $content): bool
12✔
138
    {
139
        // UTF-8 BOM is 0xEF 0xBB 0xBF
140
        return str_starts_with($content, "\xEF\xBB\xBF");
12✔
141
    }
142

143
    /**
144
     * @return array<string>
145
     */
146
    private function findInvisibleCharacters(string $content): array
12✔
147
    {
148
        $problematicChars = [];
12✔
149

150
        // Early exit for ASCII-only content (performance optimization)
151
        if (mb_check_encoding($content, 'ASCII')) {
12✔
152
            // Only check for control characters in ASCII content
153
            if (preg_match('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', $content)) {
4✔
154
                $problematicChars[] = 'Control characters';
1✔
155
            }
156

157
            return $problematicChars;
4✔
158
        }
159

160
        // Check for problematic Unicode characters individually for better performance
161
        $charMap = [
8✔
162
            "\u{200B}" => 'Zero-width space',
8✔
163
            "\u{200C}" => 'Zero-width non-joiner',
8✔
164
            "\u{200D}" => 'Zero-width joiner',
8✔
165
            "\u{2060}" => 'Word joiner',
8✔
166
            "\u{FEFF}" => 'Zero-width no-break space',
8✔
167
            "\u{200E}" => 'Left-to-right mark',
8✔
168
            "\u{200F}" => 'Right-to-left mark',
8✔
169
            "\u{00AD}" => 'Soft hyphen',
8✔
170
        ];
8✔
171

172
        foreach ($charMap as $char => $name) {
8✔
173
            if (str_contains($content, $char)) {
8✔
174
                $problematicChars[] = $name;
4✔
175
            }
176
        }
177

178
        // Check for control characters (except allowed whitespace)
179
        if (preg_match('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', $content)) {
8✔
180
            $problematicChars[] = 'Control characters';
×
181
        }
182

183
        return $problematicChars;
8✔
184
    }
185

186
    private function hasUnicodeNormalizationIssues(string $content): bool
12✔
187
    {
188
        if (!class_exists('Normalizer')) {
12✔
189
            return false;
×
190
        }
191

192
        $normalized = Normalizer::normalize($content, Normalizer::FORM_C);
12✔
193

194
        return false !== $normalized && $content !== $normalized;
12✔
195
    }
196
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc