• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

un-zero-un / Isocontent / 19885920020

03 Dec 2025 07:29AM UTC coverage: 94.505% (+0.8%) from 93.75%
19885920020

push

gha

344 of 364 relevant lines covered (94.51%)

130.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.83
/src/Parser/DOMParser.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace Isocontent\Parser;
6

7
use Isocontent\AST\Builder;
8
use Isocontent\Exception\UnsupportedFormatException;
9

10
/**
11
 * A simple HTML parser using DOMDocument / LibXML.
12
 */
13
final class DOMParser implements Parser
14
{
200✔
15
    /**
16
     * @psalm-suppress MixedAssignment
17
     */
200✔
18
    #[\Override]
200✔
19
    public function parse(Builder $builder, mixed $input): void
8✔
20
    {
21
        if (!is_string($input)) {
22
            throw new UnsupportedFormatException();
192✔
23
        }
192✔
24

25
        $document = new \DOMDocument('1.0', 'UTF-8');
192✔
26
        if (!$input) {
27
            return;
192✔
28
        }
192✔
29

30
        $oldUseInternalErrors = libxml_use_internal_errors();
192✔
31
        libxml_use_internal_errors(true);
192✔
32

192✔
33
        /** @var non-empty-string $html */
34
        $html = '<?xml encoding="UTF-8">'.$input;
35
        $document->loadHTML($html);
36

37
        libxml_clear_errors();
120✔
38
        libxml_use_internal_errors($oldUseInternalErrors);
39

40
        foreach ($document->getElementsByTagName('body') as $root) {
120✔
41
            assert($root instanceof \DOMElement);
42

43
            foreach ($root->childNodes as $childNode) {
192✔
44
                assert($childNode instanceof \DOMNode);
45
                $this->parseNode($builder, $childNode);
192✔
46
            }
192✔
47
        }
192✔
48
    }
49

192✔
50
    #[\Override]
51
    public function supportsFormat(string $format): bool
192✔
52
    {
192✔
53
        return 'html' === $format;
192✔
54
    }
55

192✔
56
    private function parseNode(Builder $builder, \DOMNode $node): void
57
    {
58
        switch ($node->nodeType) {
8✔
59
            case XML_TEXT_NODE:
60
                assert($node instanceof \DOMText);
61
                $builder->addTextNode(preg_replace('#\s{2,}#', ' ', $node->textContent) ?? '');
192✔
62

24✔
63
                return;
64

65
            case XML_ELEMENT_NODE:
192✔
66
                assert($node instanceof \DOMElement);
192✔
67
                $blockType = $this->parseBlockType($node);
68
                $childBuilder = $builder->addBlockNode($blockType[0], $blockType[1] ?? []);
69

70
                break;
71

72
            default:
73
                return;
192✔
74
        }
75

192✔
76
        if (0 === $node->childNodes->length) {
192✔
77
            return;
16✔
78
        }
79

192✔
80
        foreach ($node->childNodes as $subNode) {
16✔
81
            assert($subNode instanceof \DOMNode);
82
            $this->parseNode($childBuilder, $subNode);
192✔
83
        }
16✔
84
    }
85

192✔
86
    /**
24✔
87
     * @return array{0: string, 1?: array<string, scalar>}
88
     */
192✔
89
    private function parseBlockType(\DOMElement $node): array
16✔
90
    {
91
        switch ($node->nodeName) {
192✔
92
            case 'h1':
16✔
93
                return ['title', ['level' => 1]];
94

192✔
95
            case 'h2':
96✔
96
                return ['title', ['level' => 2]];
97

176✔
98
            case 'h3':
32✔
99
                return ['title', ['level' => 3]];
100

176✔
101
            case 'h4':
72✔
102
                return ['title', ['level' => 4]];
103

144✔
104
            case 'h5':
96✔
105
                return ['title', ['level' => 5]];
106

56✔
107
            case 'h6':
24✔
108
                return ['title', ['level' => 6]];
109

56✔
110
            case 'p':
24✔
111
                return ['paragraph'];
112

56✔
113
            case 'em':
24✔
114
                return ['emphasis'];
115

48✔
116
            case 'strong':
16✔
117
                return ['strong'];
118

48✔
119
            case 'span':
16✔
120
                return ['inline_text'];
121

32✔
122
            case 'ul':
16✔
123
                return ['list', ['ordered' => false]];
16✔
124

16✔
125
            case 'ol':
16✔
126
                return ['list', ['ordered' => true]];
16✔
127

8✔
128
            case 'li':
129
                return ['list_item'];
16✔
130

16✔
131
            case 'blockquote':
16✔
132
                return ['quote'];
133

16✔
134
            case 'br':
×
135
                return ['new_line'];
136

16✔
137
            case 'a':
×
138
                $nodeAttributes = $node->attributes;
139
                assert($nodeAttributes instanceof \DOMNamedNodeMap);
16✔
140
                $attributes = array_filter(['href' => $nodeAttributes->getNamedItem('href')?->nodeValue]);
×
141

142
                return ['link', $attributes];
16✔
143
            case 'del':
×
144
                return ['stripped'];
145

16✔
146
            case 'hr':
×
147
                return ['separator'];
148

149
            case 'sub':
16✔
150
                return ['subscript'];
151

152
            case 'sup':
153
                return ['superscript'];
154

155
            case 'code':
156
                return ['code'];
157

158
            default:
159
                return ['generic'];
160
        }
161
    }
162
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc