• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

brick / structured-data / 21717372495

05 Feb 2026 03:25PM UTC coverage: 77.67%. Remained the same
21717372495

Pull #8

github

web-flow
Merge b2d49f595 into 251e970ec
Pull Request #8: Fix: array @type parsing - iterate over $type not $types

0 of 2 new or added lines in 1 file covered. (0.0%)

1 existing line in 1 file now uncovered.

240 of 309 relevant lines covered (77.67%)

1.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.22
/src/Reader/JsonLdReader.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace Brick\StructuredData\Reader;
6

7
use Brick\StructuredData\Item;
8
use Brick\StructuredData\Reader;
9
use DOMDocument;
10
use DOMNode;
11
use DOMXPath;
12
use Override;
13
use Sabre\Uri\InvalidUriException;
14
use stdClass;
15

16
use function array_filter;
17
use function array_map;
18
use function array_merge;
19
use function array_values;
20
use function array_walk_recursive;
21
use function in_array;
22
use function is_array;
23
use function is_bool;
24
use function is_object;
25
use function is_scalar;
26
use function is_string;
27
use function iterator_to_array;
28
use function json_decode;
29
use function Sabre\Uri\build;
30
use function Sabre\Uri\parse;
31
use function Sabre\Uri\resolve;
32

33
use const JSON_THROW_ON_ERROR;
34

35
/**
36
 * Reads JSON-LD documents embedded into a HTML document.
37
 *
38
 * This first implementation is a rudimentary parser that only implements a subset of the JSON-LD spec, only allows a
39
 * string in `@context`, and considers this string a vocabulary identifier and not an external context file.
40
 *
41
 * This may look like it's missing a lot (it is), like it will make mistakes (it will), but this should be enough to
42
 * parse most of the web pages embedding schema.org data, as long as they follow the simple syntax used in the examples.
43
 *
44
 * https://json-ld.org/spec/latest/json-ld/
45
 */
46
final class JsonLdReader implements Reader
47
{
48
    /**
49
     * @var string[]
50
     */
51
    private readonly array $iriProperties;
52

53
    /**
54
     * JsonLdReader constructor.
55
     *
56
     * Because this naive implementation cannot parse contexts, it accepts a hardcoded list of properties whose values
57
     * will be considered as IRIs and resolved relative to the current URL.
58
     *
59
     * Example: ['http://schema.org/image', 'http://schema.org/url']
60
     *
61
     * @param string[] $iriProperties
62
     */
63
    public function __construct(array $iriProperties = [])
64
    {
65
        $this->iriProperties = $iriProperties;
5✔
66
    }
67

68
    #[Override]
69
    public function read(DOMDocument $document, string $url): array
70
    {
71
        $xpath = new DOMXPath($document);
5✔
72

73
        $nodes = $xpath->query('//script[@type="application/ld+json"]');
5✔
74
        $nodes = iterator_to_array($nodes);
5✔
75

76
        if (! $nodes) {
5✔
77
            return [];
2✔
78
        }
79

80
        $items = array_map(
3✔
81
            fn (DOMNode $node) => $this->readJson($node->textContent, $url),
3✔
82
            $nodes,
3✔
83
        );
3✔
84

85
        return array_merge(...$items);
3✔
86
    }
87

88
    /**
89
     * Reads a list of items from a JSON-LD string.
90
     *
91
     * If the JSON is not valid, an empty array is returned.
92
     *
93
     * @param string $json The JSON string.
94
     * @param string $url  The URL the document was retrieved from, for relative URL resolution.
95
     *
96
     * @return Item[]
97
     */
98
    private function readJson(string $json, string $url): array
99
    {
100
        $data = json_decode($json, flags: JSON_THROW_ON_ERROR);
3✔
101

102
        if ($data === null) {
3✔
103
            return [];
×
104
        }
105

106
        if (is_object($data)) {
3✔
107
            if (isset($data->{'@graph'}) && is_array($data->{'@graph'})) {
3✔
108
                $data = $data->{'@graph'};
1✔
109
            } else {
110
                $item = $this->readItem($data, $url, null);
2✔
111

112
                return [$item];
2✔
113
            }
114
        }
115

116
        if (is_array($data)) {
1✔
117
            $items = array_map(
1✔
118
                fn ($item) => is_object($item) ? $this->readItem($item, $url, null) : null,
1✔
119
                $data,
1✔
120
            );
1✔
121

122
            $items = array_filter($items);
1✔
123
            $items = array_values($items);
1✔
124

125
            return $items;
1✔
126
        }
127

128
        return [];
×
129
    }
130

131
    /**
132
     * Reads a single item.
133
     *
134
     * @param stdClass    $item       A decoded JSON object representing an item, or null if invalid.
135
     * @param string      $url        The URL the document was retrieved from, for relative URL resolution.
136
     * @param string|null $vocabulary The currently vocabulary URL, if any.
137
     */
138
    private function readItem(stdClass $item, string $url, ?string $vocabulary): Item
139
    {
140
        if (isset($item->{'@context'}) && is_string($item->{'@context'})) {
3✔
141
            $vocabulary = $this->checkVocabularyUrl($item->{'@context'}); // ugh
3✔
142
        }
143

144
        $id = null;
3✔
145

146
        if (isset($item->{'@id'}) && is_string($item->{'@id'})) {
3✔
147
            try {
148
                $id = resolve($url, $item->{'@id'}); // always relative to the document URL, no support for @base
1✔
149
            } catch (InvalidUriException) {
×
150
                // ignore
151
            }
152
        }
153

154
        $types = [];
3✔
155

156
        if (isset($item->{'@type'})) {
3✔
157
            $type = $item->{'@type'};
3✔
158

159
            if (is_string($type)) {
3✔
160
                $type = $this->resolveTerm($type, $vocabulary);
3✔
161
                $types = [$type];
3✔
162
            } elseif (is_array($type)) {
×
163
                $types = array_map(
×
NEW
164
                    fn ($t) => is_string($t) ? $this->resolveTerm($t, $vocabulary) : null,
×
NEW
165
                    $type,
×
UNCOV
166
                );
×
167

168
                $types = array_filter($types);
×
169
                $types = array_values($types);
×
170
            }
171
        }
172

173
        $result = new Item($id, ...$types);
3✔
174

175
        foreach ($item as $name => $value) {
3✔
176
            if ($name === '' || $name[0] === '@') {
3✔
177
                continue;
3✔
178
            }
179

180
            $name = $this->resolveTerm($name, $vocabulary);
3✔
181

182
            if (is_array($value)) {
3✔
183
                // Flatten the array: not sure if this is required by the JSON-LD standard, but some websites output
184
                // nested arrays such as "offer": [[ { ... } ]], and Google Structured Data Testing Tool does recognize
185
                // this syntax, so we're doing the same here.
186
                $value = $this->flattenArray($value);
3✔
187

188
                foreach ($value as $theValue) {
3✔
189
                    $theValue = $this->getPropertyValue($name, $theValue, $url, $vocabulary);
3✔
190

191
                    if ($theValue !== null) {
3✔
192
                        $result->addProperty($name, $theValue);
3✔
193
                    }
194
                }
195
            } else {
196
                $value = $this->getPropertyValue($name, $value, $url, $vocabulary);
3✔
197

198
                if ($value !== null) {
3✔
199
                    $result->addProperty($name, $value);
3✔
200
                }
201
            }
202
        }
203

204
        return $result;
3✔
205
    }
206

207
    /**
208
     * Flattens a potentially multidimensional array.
209
     *
210
     * The result array contains no nested arrays.
211
     */
212
    private function flattenArray(array $array): array
213
    {
214
        $result = [];
3✔
215

216
        array_walk_recursive($array, function ($a) use (&$result): void {
3✔
217
            $result[] = $a;
3✔
218
        });
3✔
219

220
        return $result;
3✔
221
    }
222

223
    private function resolveTerm(string $term, ?string $vocabulary): string
224
    {
225
        if ($vocabulary !== null) {
3✔
226
            return $vocabulary . $term;
3✔
227
        }
228

229
        return $term;
×
230
    }
231

232
    /**
233
     * @param string      $name       The property name.
234
     * @param mixed       $value      The property value. Any JSON type.
235
     * @param string      $url        The URL the document was retrieved from, for relative URL resolution.
236
     * @param string|null $vocabulary The current vocabulary URL, if any.
237
     *
238
     * @return Item|string|null The value, or NULL if the input value is NULL or an array.
239
     */
240
    private function getPropertyValue(string $name, mixed $value, string $url, ?string $vocabulary): null|Item|string
241
    {
242
        if (is_string($value)) {
3✔
243
            if (in_array($name, $this->iriProperties, true)) {
3✔
244
                try {
245
                    $value = resolve($url, $value);
1✔
246
                } catch (InvalidUriException) {
×
247
                    // ignore
248
                }
249
            }
250
        }
251

252
        if (is_bool($value)) {
3✔
253
            return $value ? 'true' : 'false';
×
254
        }
255

256
        if (is_scalar($value)) {
3✔
257
            return (string) $value;
3✔
258
        }
259

260
        if (is_object($value)) {
3✔
261
            return $this->readItem($value, $url, $vocabulary);
3✔
262
        }
263

264
        return null;
×
265
    }
266

267
    /**
268
     * Ensures that the vocabulary URL is a valid absolute URL, and ensure that it has a path.
269
     *
270
     * Example: http://schema.org would return http://schema.org/
271
     *
272
     * @return string|null An absolute URL, or null if the input is not valid.
273
     */
274
    private function checkVocabularyUrl(string $url): ?string
275
    {
276
        try {
277
            $parts = parse($url);
3✔
278
        } catch (InvalidUriException) {
×
279
            return null;
×
280
        }
281

282
        if ($parts['scheme'] === null) {
3✔
283
            return null;
×
284
        }
285

286
        if ($parts['host'] === null) {
3✔
287
            return null;
×
288
        }
289

290
        if ($parts['path'] === null) {
3✔
291
            $parts['path'] = '/';
2✔
292
        }
293

294
        return build($parts);
3✔
295
    }
296
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc