• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

un-zero-un / Isocontent / 19885920020

03 Dec 2025 07:29AM UTC coverage: 94.505% (+0.8%) from 93.75%
19885920020

push

gha

344 of 364 relevant lines covered (94.51%)

130.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.83
/src/Parser/DOMParser.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace Isocontent\Parser;
6

7
use Isocontent\AST\Builder;
8
use Isocontent\Exception\FeatureNotAvailableException;
9
use Isocontent\Exception\UnsupportedFormatException;
10

11
/**
12
 * A simple HTML parser using DOMDocument / LibXML.
13
 */
14
final class DOMParser implements Parser
200✔
15
{
16
    /**
17
     * @psalm-suppress MixedAssignment
200✔
18
     */
200✔
19
    #[\Override]
8✔
20
    public function parse(Builder $builder, mixed $input): void
21
    {
22
        if (!class_exists(\DOMDocument::class)) {
192✔
23
            throw new FeatureNotAvailableException(\DOMDocument::class, __CLASS__); // @codeCoverageIgnore
192✔
24
        }
25

192✔
26
        if (!is_string($input)) {
27
            throw new UnsupportedFormatException();
192✔
28
        }
192✔
29

30
        $document = new \DOMDocument('1.0', 'UTF-8');
192✔
31
        if (!$input) {
192✔
32
            return;
192✔
33
        }
34

35
        $oldUseInternalErrors = libxml_use_internal_errors();
36
        libxml_use_internal_errors(true);
37

120✔
38
        /** @var non-empty-string $html */
39
        $html = '<?xml encoding="UTF-8">'.$input;
40
        $document->loadHTML($html);
120✔
41

42
        libxml_clear_errors();
43
        libxml_use_internal_errors($oldUseInternalErrors);
192✔
44

45
        foreach ($document->getElementsByTagName('body') as $root) {
192✔
46
            assert($root instanceof \DOMElement);
192✔
47

192✔
48
            foreach ($root->childNodes as $childNode) {
49
                assert($childNode instanceof \DOMNode);
192✔
50
                $this->parseNode($builder, $childNode);
51
            }
192✔
52
        }
192✔
53
    }
192✔
54

55
    #[\Override]
192✔
56
    public function supportsFormat(string $format): bool
57
    {
58
        return 'html' === $format && class_exists(\DOMDocument::class);
8✔
59
    }
60

61
    private function parseNode(Builder $builder, \DOMNode $node): void
192✔
62
    {
24✔
63
        switch ($node->nodeType) {
64
            case XML_TEXT_NODE:
65
                assert($node instanceof \DOMText);
192✔
66
                $builder->addTextNode(preg_replace('#\s{2,}#', ' ', $node->textContent) ?? '');
192✔
67

68
                return;
69

70
            case XML_ELEMENT_NODE:
71
                assert($node instanceof \DOMElement);
72
                $blockType = $this->parseBlockType($node);
73
                $childBuilder = $builder->addBlockNode($blockType[0], $blockType[1] ?? []);
192✔
74

75
                break;
192✔
76

192✔
77
            default:
16✔
78
                return;
79
        }
192✔
80

16✔
81
        if (0 === $node->childNodes->length) {
82
            return;
192✔
83
        }
16✔
84

85
        foreach ($node->childNodes as $subNode) {
192✔
86
            assert($subNode instanceof \DOMNode);
24✔
87
            $this->parseNode($childBuilder, $subNode);
88
        }
192✔
89
    }
16✔
90

91
    /**
192✔
92
     * @return array{0: string, 1?: array<string, scalar>}
16✔
93
     */
94
    private function parseBlockType(\DOMElement $node): array
192✔
95
    {
96✔
96
        switch ($node->nodeName) {
97
            case 'h1':
176✔
98
                return ['title', ['level' => 1]];
32✔
99

100
            case 'h2':
176✔
101
                return ['title', ['level' => 2]];
72✔
102

103
            case 'h3':
144✔
104
                return ['title', ['level' => 3]];
96✔
105

106
            case 'h4':
56✔
107
                return ['title', ['level' => 4]];
24✔
108

109
            case 'h5':
56✔
110
                return ['title', ['level' => 5]];
24✔
111

112
            case 'h6':
56✔
113
                return ['title', ['level' => 6]];
24✔
114

115
            case 'p':
48✔
116
                return ['paragraph'];
16✔
117

118
            case 'em':
48✔
119
                return ['emphasis'];
16✔
120

121
            case 'strong':
32✔
122
                return ['strong'];
16✔
123

16✔
124
            case 'span':
16✔
125
                return ['inline_text'];
16✔
126

16✔
127
            case 'ul':
8✔
128
                return ['list', ['ordered' => false]];
129

16✔
130
            case 'ol':
16✔
131
                return ['list', ['ordered' => true]];
16✔
132

133
            case 'li':
16✔
134
                return ['list_item'];
×
135

136
            case 'blockquote':
16✔
137
                return ['quote'];
×
138

139
            case 'br':
16✔
140
                return ['new_line'];
×
141

142
            case 'a':
16✔
143
                $nodeAttributes = $node->attributes;
×
144
                assert($nodeAttributes instanceof \DOMNamedNodeMap);
145
                $attributes = array_filter([
16✔
146
                    'href' => $nodeAttributes->getNamedItem('href')?->nodeValue,
×
147
                    'download' => (bool) $nodeAttributes->getNamedItem('download'),
148
                    'rel' => $nodeAttributes->getNamedItem('target')?->nodeValue,
149
                    'target' => $nodeAttributes->getNamedItem('target')?->nodeValue,
16✔
150
                ]);
151

152
                return ['link', $attributes];
153
            case 'del':
154
                return ['stripped'];
155

156
            case 'hr':
157
                return ['separator'];
158

159
            case 'sub':
160
                return ['subscript'];
161

162
            case 'sup':
163
                return ['superscript'];
164

165
            case 'code':
166
                return ['code'];
167

168
            default:
169
                return ['generic'];
170
        }
171
    }
172
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc