• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

move-elevator / composer-translation-validator / 16266756300

14 Jul 2025 12:24PM UTC coverage: 96.277% (-0.1%) from 96.418%
16266756300

push

github

web-flow
Merge pull request #33 from move-elevator/encoding-validator

feat: add EncodingValidator for file encoding and JSON syntax validation

67 of 72 new or added lines in 2 files covered. (93.06%)

1655 of 1719 relevant lines covered (96.28%)

7.64 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.96
/src/Validator/EncodingValidator.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace MoveElevator\ComposerTranslationValidator\Validator;
6

7
use MoveElevator\ComposerTranslationValidator\Parser\JsonParser;
8
use MoveElevator\ComposerTranslationValidator\Parser\ParserInterface;
9
use MoveElevator\ComposerTranslationValidator\Parser\PhpParser;
10
use MoveElevator\ComposerTranslationValidator\Parser\XliffParser;
11
use MoveElevator\ComposerTranslationValidator\Parser\YamlParser;
12
use MoveElevator\ComposerTranslationValidator\Result\Issue;
13

14
class EncodingValidator extends AbstractValidator implements ValidatorInterface
15
{
16
    /**
17
     * @return array<string, string>
18
     */
19
    public function processFile(ParserInterface $file): array
12✔
20
    {
21
        $filePath = $file->getFilePath();
12✔
22
        $issues = [];
12✔
23

24
        // Read raw file content
25
        $content = file_get_contents($filePath);
12✔
26
        if (false === $content) {
12✔
NEW
27
            $this->logger?->error(
×
NEW
28
                'Could not read file content: '.$file->getFileName()
×
NEW
29
            );
×
30

NEW
31
            return [];
×
32
        }
33

34
        // Check UTF-8 encoding
35
        if (!$this->isValidUtf8($content)) {
12✔
36
            $issues['encoding'] = 'File is not valid UTF-8 encoded';
1✔
37
        }
38

39
        // Check for BOM
40
        if ($this->hasByteOrderMark($content)) {
12✔
41
            $issues['bom'] = 'File contains UTF-8 Byte Order Mark (BOM)';
3✔
42
        }
43

44
        // Check for invisible/problematic characters
45
        $invisibleChars = $this->findInvisibleCharacters($content);
12✔
46
        if (!empty($invisibleChars)) {
12✔
47
            $issues['invisible_chars'] = sprintf(
5✔
48
                'File contains invisible characters: %s',
5✔
49
                implode(', ', array_unique($invisibleChars))
5✔
50
            );
5✔
51
        }
52

53
        // Check Unicode normalization
54
        if ($this->hasUnicodeNormalizationIssues($content)) {
12✔
55
            $issues['unicode_normalization'] = 'File contains non-NFC normalized Unicode characters';
1✔
56
        }
57

58
        // JSON-specific validation for JSON files
59
        if ($file instanceof JsonParser && !$this->isValidJsonStructure($content)) {
12✔
60
            $issues['json_syntax'] = 'File contains invalid JSON syntax';
1✔
61
        }
62

63
        return $issues;
12✔
64
    }
65

66
    public function formatIssueMessage(Issue $issue, string $prefix = ''): string
1✔
67
    {
68
        $details = $issue->getDetails();
1✔
69
        $resultType = $this->resultTypeOnValidationFailure();
1✔
70

71
        $level = $resultType->toString();
1✔
72
        $color = $resultType->toColorString();
1✔
73

74
        $messages = [];
1✔
75
        foreach ($details as $type => $message) {
1✔
76
            if (is_string($type) && is_string($message)) {
1✔
77
                $messages[] = "- <fg=$color>$level</> {$prefix}encoding issue: $message";
1✔
78
            }
79
        }
80

81
        return implode("\n", $messages);
1✔
82
    }
83

84
    /**
85
     * @return class-string<ParserInterface>[]
86
     */
87
    public function supportsParser(): array
1✔
88
    {
89
        return [XliffParser::class, YamlParser::class, JsonParser::class, PhpParser::class];
1✔
90
    }
91

92
    public function resultTypeOnValidationFailure(): ResultType
2✔
93
    {
94
        return ResultType::WARNING;
2✔
95
    }
96

97
    private function isValidUtf8(string $content): bool
12✔
98
    {
99
        return mb_check_encoding($content, 'UTF-8');
12✔
100
    }
101

102
    private function hasByteOrderMark(string $content): bool
12✔
103
    {
104
        // UTF-8 BOM is 0xEF 0xBB 0xBF
105
        return str_starts_with($content, "\xEF\xBB\xBF");
12✔
106
    }
107

108
    /**
109
     * @return array<string>
110
     */
111
    private function findInvisibleCharacters(string $content): array
12✔
112
    {
113
        $problematicChars = [];
12✔
114

115
        // Check for various problematic characters
116
        $checks = [
12✔
117
            'Zero-width space' => "\u{200B}",
12✔
118
            'Zero-width non-joiner' => "\u{200C}",
12✔
119
            'Zero-width joiner' => "\u{200D}",
12✔
120
            'Word joiner' => "\u{2060}",
12✔
121
            'Zero-width no-break space' => "\u{FEFF}",
12✔
122
            'Left-to-right mark' => "\u{200E}",
12✔
123
            'Right-to-left mark' => "\u{200F}",
12✔
124
            'Soft hyphen' => "\u{00AD}",
12✔
125
        ];
12✔
126

127
        foreach ($checks as $name => $char) {
12✔
128
            if (str_contains($content, $char)) {
12✔
129
                $problematicChars[] = $name;
4✔
130
            }
131
        }
132

133
        // Check for control characters (except allowed whitespace)
134
        if (preg_match('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', $content)) {
12✔
135
            $problematicChars[] = 'Control characters';
1✔
136
        }
137

138
        return $problematicChars;
12✔
139
    }
140

141
    private function hasUnicodeNormalizationIssues(string $content): bool
12✔
142
    {
143
        if (!class_exists('Normalizer')) {
12✔
144
            // If intl extension is not available, skip this check
NEW
145
            return false;
×
146
        }
147

148
        // Check if content is not in NFC (Canonical Decomposition followed by Canonical Composition)
149
        $normalized = \Normalizer::normalize($content, \Normalizer::FORM_C);
12✔
150

151
        return false !== $normalized && $content !== $normalized;
12✔
152
    }
153

154
    private function isValidJsonStructure(string $content): bool
3✔
155
    {
156
        // Remove BOM if present for JSON validation
157
        $cleanContent = $this->hasByteOrderMark($content)
3✔
158
            ? substr($content, 3)
1✔
159
            : $content;
2✔
160

161
        json_decode($cleanContent);
3✔
162

163
        return JSON_ERROR_NONE === json_last_error();
3✔
164
    }
165
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc