• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

evolvedbinary / elemental / 982

29 Apr 2025 08:34PM UTC coverage: 56.409% (+0.007%) from 56.402%
982

push

circleci

adamretter
[feature] Improve README.md badges

28451 of 55847 branches covered (50.94%)

Branch coverage included in aggregate %.

77468 of 131924 relevant lines covered (58.72%)

0.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.71
/exist-core/src/main/java/org/exist/Indexer.java
1
/*
2
 * Elemental
3
 * Copyright (C) 2024, Evolved Binary Ltd
4
 *
5
 * admin@evolvedbinary.com
6
 * https://www.evolvedbinary.com | https://www.elemental.xyz
7
 *
8
 * Use of this software is governed by the Business Source License 1.1
9
 * included in the LICENSE file and at www.mariadb.com/bsl11.
10
 *
11
 * Change Date: 2028-04-27
12
 *
13
 * On the date above, in accordance with the Business Source License, use
14
 * of this software will be governed by the Apache License, Version 2.0.
15
 *
16
 * Additional Use Grant: Production use of the Licensed Work for a permitted
17
 * purpose. A Permitted Purpose is any purpose other than a Competing Use.
18
 * A Competing Use means making the Software available to others in a commercial
19
 * product or service that: substitutes for the Software; substitutes for any
20
 * other product or service we offer using the Software that exists as of the
21
 * date we make the Software available; or offers the same or substantially
22
 * similar functionality as the Software.
23
 *
24
 * NOTE: Parts of this file contain code from 'The eXist-db Authors'.
25
 *       The original license header is included below.
26
 *
27
 * =====================================================================
28
 *
29
 * eXist-db Open Source Native XML Database
30
 * Copyright (C) 2001 The eXist-db Authors
31
 *
32
 * info@exist-db.org
33
 * http://www.exist-db.org
34
 *
35
 * This library is free software; you can redistribute it and/or
36
 * modify it under the terms of the GNU Lesser General Public
37
 * License as published by the Free Software Foundation; either
38
 * version 2.1 of the License, or (at your option) any later version.
39
 *
40
 * This library is distributed in the hope that it will be useful,
41
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
43
 * Lesser General Public License for more details.
44
 *
45
 * You should have received a copy of the GNU Lesser General Public
46
 * License along with this library; if not, write to the Free Software
47
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
48
 */
49
package org.exist;
50

51
import java.util.*;
52

53
import org.apache.logging.log4j.LogManager;
54
import org.apache.logging.log4j.Logger;
55
import org.exist.collections.CollectionConfiguration;
56
import org.exist.dom.QName;
57
import org.exist.dom.persistent.AttrImpl;
58
import org.exist.dom.persistent.CDATASectionImpl;
59
import org.exist.dom.persistent.CommentImpl;
60
import org.exist.dom.persistent.DocumentImpl;
61
import org.exist.dom.persistent.DocumentTypeImpl;
62
import org.exist.dom.persistent.ElementImpl;
63
import org.exist.dom.persistent.NodeHandle;
64
import org.exist.dom.persistent.ProcessingInstructionImpl;
65
import org.exist.dom.persistent.StoredNode;
66
import org.exist.dom.persistent.TextImpl;
67
import org.exist.dom.persistent.XMLDeclarationImpl;
68
import org.exist.indexing.StreamListener;
69
import org.exist.indexing.StreamListener.ReindexMode;
70
import org.exist.storage.DBBroker;
71
import org.exist.storage.IndexSpec;
72
import org.exist.storage.NodePath2;
73
import org.exist.storage.RangeIndexSpec;
74
import org.exist.storage.txn.Txn;
75
import org.exist.util.Configuration;
76
import org.exist.util.ProgressIndicator;
77
import org.exist.util.XMLString;
78
import org.exist.util.pool.NodePool;
79
import org.exist.xquery.Constants;
80
import org.exist.xquery.Expression;
81
import org.exist.xquery.value.StringValue;
82
import org.w3c.dom.DOMException;
83
import org.w3c.dom.Element;
84
import org.w3c.dom.Node;
85
import org.xml.sax.Attributes;
86
import org.xml.sax.ContentHandler;
87
import org.xml.sax.ErrorHandler;
88
import org.xml.sax.Locator;
89
import org.xml.sax.SAXException;
90
import org.xml.sax.SAXParseException;
91
import org.xml.sax.ext.LexicalHandler;
92

93
import javax.annotation.Nullable;
94

95
/**
96
 * Parses a given input document via SAX, stores it to the database and handles
97
 * index-creation.
98
 * 
99
 * @author wolf
100
 */
101
public class Indexer implements ContentHandler, LexicalHandler, ErrorHandler {
102

103
    private static final int CACHE_CHILD_COUNT_MAX = 0x10000;
104

105
    public static final String ATTR_CDATA_TYPE = "CDATA";
106
    public static final String ATTR_ID_TYPE = "ID";
107
    public static final String ATTR_IDREF_TYPE = "IDREF";
108
    public static final String ATTR_IDREFS_TYPE = "IDREFS";
109

110
    private final static Logger LOG = LogManager.getLogger(Indexer.class);
1✔
111

112
    public static final String CONFIGURATION_ELEMENT_NAME = "indexer";
113
    public static final String CONFIGURATION_INDEX_ELEMENT_NAME = "index";
114
    public static final String SUPPRESS_WHITESPACE_ATTRIBUTE = "suppress-whitespace";
115
    public static final String PRESERVE_WS_MIXED_CONTENT_ATTRIBUTE = "preserve-whitespace-mixed-content";
116

117
    public static final String PROPERTY_INDEXER_CONFIG = "indexer.config";
118
    public final static String PROPERTY_SUPPRESS_WHITESPACE = "indexer.suppress-whitespace";
119
    public static final String PROPERTY_PRESERVE_WS_MIXED_CONTENT = "indexer.preserve-whitespace-mixed-content";
1✔
120

121
    private final DBBroker broker;
122
    private final Txn transaction;
123

124
    private StreamListener indexListener;
125

126
    private XMLString charBuf = new XMLString();
1✔
127
    private boolean inCDATASection = false;
1✔
128
    private int currentLine = 0;
1✔
129
    private final NodePath2 currentPath = new NodePath2();
1✔
130

131
    private DocumentImpl document = null;
1✔
132
    private IndexSpec indexSpec = null;
1✔
133

134
    private boolean insideDTD = false;
1✔
135
    private boolean validate = false;
1✔
136
    private int level = 0;
1✔
137
    private Locator locator = null;
1✔
138
    private int normalize = XMLString.SUPPRESS_BOTH;
1✔
139
    private final Map<String, String> nsMappings = new HashMap<>();
1✔
140
    private Element rootNode;
141

142
    private final Deque<ElementImpl> stack = new ArrayDeque<>();
1✔
143
    private final Deque<XMLString> nodeContentStack = new ArrayDeque<>();
1✔
144

145
    private StoredNode prevNode = null;
1✔
146

147
    private String ignorePrefix = null;
1✔
148
    private ProgressIndicator progress;
149

150
    protected boolean preserveWSmixed = false;
1✔
151

152
    protected int docSize = 0;
1✔
153

154
    private enum ProcessTextParent { COMMENT, PI, CDATA_START, ELEMENT_START, ELEMENT_END}
1✔
155

156
    /*
157
     * used to record the number of children of an element during validation
158
     * phase. later, when storing the nodes, we already know the child count and
159
     * don't need to update the element a second time.
160
     */
161
    private int childCnt[] = new int[0x1000];
1✔
162

163
    // the current position in childCnt
164
    private int elementCnt = 0;
1✔
165

166
    // the current nodeFactoryInstanceCnt
167
    private int nodeFactoryInstanceCnt = 0;
1✔
168

169
    // reusable fields
170
    private final TextImpl text = new TextImpl((Expression) null);
1✔
171
    private final Deque<ElementImpl> usedElements = new ArrayDeque<>();
1✔
172

173
    // when storing the document data, validation will be switched off, so
174
    // entities will not be reported. We thus have to cache all needed entities
175
    // during the validation run.
176
    private Map<String, String> entityMap = null;
1✔
177
    private String currentEntityName = null;
1✔
178
    private final XMLString currentEntityValue = new XMLString();
1✔
179
    
180
    /**
181
     * Create a new parser using the given database broker and user to store the
182
     * document.
183
     * 
184
     * @param broker The database broker to use.
185
     * @param transaction The transaction to use for indexing privileged access to the db.
186
     *
187
     * @throws EXistException if an error orccurs when constructing the indexer.
188
     */
189
    public Indexer(final DBBroker broker, final Txn transaction)
1✔
190
            throws EXistException {
191
        this.broker = broker;
1✔
192
        this.transaction = transaction;
1✔
193
        // TODO : move the configuration in the constructor or in a dedicated
194
        // method
195
        final Configuration config = broker.getConfiguration();
1✔
196
        final String suppressWS = (String) config
1✔
197
            .getProperty(PROPERTY_SUPPRESS_WHITESPACE);
1✔
198
        if (suppressWS != null) {
1✔
199
            switch (suppressWS) {
1!
200
                case "leading":
201
                    normalize = XMLString.SUPPRESS_LEADING_WS;
×
202
                    break;
×
203
                case "trailing":
204
                    normalize = XMLString.SUPPRESS_TRAILING_WS;
×
205
                    break;
×
206
                case "none":
207
                    normalize = 0;
1✔
208
                    break;
209
            }
210
        }
211
        Boolean temp;
212
        if ((temp = (Boolean) config
1✔
213
             .getProperty(PROPERTY_PRESERVE_WS_MIXED_CONTENT)) != null) {
1✔
214
            preserveWSmixed = temp;
1✔
215
        }
216
    }
1✔
217

218
    public void setValidating(final boolean validate) {
219
        this.validate = validate;
1✔
220
        if (!validate) {
1✔
221
            this.indexListener = broker.getIndexController()
1✔
222
                .getStreamListener(document, ReindexMode.STORE);
1✔
223
        }
224
    }
1✔
225

226
    /**
227
     * Prepare the indexer for parsing a new document. This will reset the
228
     * internal state of the Indexer object.
229
     * 
230
     * @param doc The document
231
     * @param collectionConfig The configuration of the collection holding the document
232
     */
233
    public void setDocument(final DocumentImpl doc,
234
            final CollectionConfiguration collectionConfig) {
235
        document = doc;
1✔
236
        if (collectionConfig != null) {
1!
237
            indexSpec = collectionConfig.getIndexConfiguration();
1✔
238
        }
239
        // reset internal fields
240
        level = 0;
1✔
241
        currentPath.reset();
1✔
242
        stack.clear();
1✔
243
        docSize = 0;
1✔
244
        nsMappings.clear();
1✔
245
        indexListener = null;
1✔
246
        rootNode = null;
1✔
247
        setPrevious(null);
1✔
248
    }
1✔
249

250
    /**
251
     * Set the document object to be used by this Indexer. This method doesn't
252
     * reset the internal state.
253
     * 
254
     * @param doc The document
255
     */
256
    public void setDocumentObject(final DocumentImpl doc) {
257
        document = doc;
1✔
258
    }
1✔
259

260
    public DocumentImpl getDocument() {
261
        return document;
1✔
262
    }
263

264
    public int getDocSize() {
265
        return docSize;
×
266
    }
267

268
    @Override
269
    public void characters(final char[] ch, final int start, final int length) {
270
        if (length <= 0) {
1!
271
            return;
×
272
        }
273

274
        if (charBuf != null) {
1!
275
            charBuf.append(ch, start, length);
1✔
276
        } else {
1✔
277
            charBuf = new XMLString(ch, start, length);
×
278
        }
279
        if (currentEntityName != null) {
1✔
280
            currentEntityValue.append(ch, start, length);
1✔
281
        }
282
    }
1✔
283

284
    @Override
285
    public void comment(final char[] ch, final int start, final int length) {
286
        if (insideDTD) {
1!
287
            return;
×
288
        }
289
        final CommentImpl comment = new CommentImpl(null, ch, start, length);
1✔
290
        comment.setOwnerDocument(document);
1✔
291
        if (stack.isEmpty()) {
1✔
292
            comment.setNodeId(broker.getBrokerPool().getNodeFactory()
1✔
293
                    .createInstance(nodeFactoryInstanceCnt++));
1✔
294
            if (!validate) {
1✔
295
                broker.storeNode(transaction, comment, currentPath, indexSpec);
1✔
296
            }
297
            document.appendChild((NodeHandle)comment);
1✔
298
        } else {
1✔
299
            final ElementImpl last = stack.peek();
1✔
300
            processText(last, ProcessTextParent.COMMENT);
1✔
301
            last.appendChildInternal(prevNode, comment);
1✔
302
            setPrevious(comment);
1✔
303
            if (!validate) {
1✔
304
                broker.storeNode(transaction, comment, currentPath, indexSpec);
1✔
305
            }
306
        }
307
    }
1✔
308

309
    @Override
310
    public void endCDATA() {
311
        if (!stack.isEmpty()) {
1!
312
            final ElementImpl last = stack.peek();
1✔
313
            if (charBuf != null && charBuf.length() > 0) {
1!
314
                final CDATASectionImpl cdata = new CDATASectionImpl(last.getExpression(), charBuf);
1✔
315
                cdata.setOwnerDocument(document);
1✔
316
                last.appendChildInternal(prevNode, cdata);
1✔
317
                if (!validate) {
1✔
318
                    broker.storeNode(transaction, cdata, currentPath, indexSpec);
1✔
319
                    if (indexListener != null) {
1!
320
                        indexListener.characters(transaction, cdata, currentPath);
1✔
321
                    }
322
                }
323
                setPrevious(cdata);
1✔
324
                if (!nodeContentStack.isEmpty()) {
1!
325
                    for (final XMLString next : nodeContentStack) {
×
326
                        next.append(charBuf);
×
327
                    }
328
                }
329
                charBuf.reset();
1✔
330
            }
331
        }
332
        inCDATASection = false;
1✔
333
    }
1✔
334

335
    @Override
336
    public void endDTD() {
337
        insideDTD = false;
1✔
338
    }
1✔
339

340
    @Override
341
    public void endDocument() {
342
        if (!validate) {
1✔
343
            if(indexListener != null) {
1!
344
                indexListener.endIndexDocument(transaction);
1✔
345
            }
346
            progress.finish();
1✔
347
        }
348
        //LOG.debug("elementCnt = " + childCnt.length);
349
    }
1✔
350

351
    private void processText(ElementImpl last, ProcessTextParent ptp) {
352
        // if (charBuf != null && charBuf.length() > 0) {
353
        //    // remove whitespace if the node has just a single text child,
354
        //    // keep whitespace for mixed content.
355
        //     final XMLString normalized;
356
        //     if ((charBuf.isWhitespaceOnly() && preserveWSmixed) || last.preserveSpace()) {
357
        //         normalized = charBuf;
358
        //     } else {
359
        //         if (last.getChildCount() == 0) {
360
        //            normalized = charBuf.normalize(normalize);
361
        //         } else {
362
        //             normalized = charBuf.isWhitespaceOnly() ? null : charBuf;
363
        //         }
364
        //     }
365
        //     if (normalized != null && normalized.length() > 0) {
366
        //         text.setData(normalized);
367
        //         text.setOwnerDocument(document);
368
        //         last.appendChildInternal(prevNode, text);
369
        //         if (!validate) storeText();
370
        //         setPrevious(text);
371
        //     }
372
        //     charBuf.reset();
373
        // }
374

375
        //from startElement method
376
        if (charBuf != null && charBuf.length() > 0) {
1!
377
            XMLString normalized = null;
1✔
378
            switch (ptp) {
1✔
379
                case COMMENT:
380
                case PI:
381
                case CDATA_START:
382
                    normalized = charBuf;
1✔
383
                    break;
1✔
384
                default:
385
            if (charBuf.isWhitespaceOnly()) {
1✔
386
                if (last.preserveSpace() || last.getChildCount() == 0) {
1✔
387
                    normalized = charBuf;
1✔
388
                } else if (preserveWSmixed) {
1✔
389
                    if (!(last.getChildCount() == 0 && (normalize & XMLString.SUPPRESS_LEADING_WS) != 0)) {
1!
390
                        normalized = charBuf;
1✔
391
                    }
392
                } else {
1✔
393
                    normalized = charBuf.normalize(normalize);
1✔
394
                }
395
            } else {
1✔
396
                //normalized = charBuf;
397
                if (last.preserveSpace()) {
1✔
398
                    normalized = charBuf;
1✔
399
                } else if (last.getChildCount() == 0) {
1✔
400
                    normalized = charBuf.normalize(normalize);
1✔
401
                } else {
1✔
402
                    // mixed element content: don't normalize the text node,
403
                    // just check if there is any text at all
404
                    if (preserveWSmixed) {
1!
405
                        normalized = charBuf;
×
406
                    } else {
×
407
                        if ((normalize & XMLString.SUPPRESS_LEADING_WS) != 0) {
1✔
408
                            normalized = charBuf.normalize(XMLString.SUPPRESS_LEADING_WS | XMLString.COLLAPSE_WS);
1✔
409
                        } else if ((normalize & XMLString.SUPPRESS_TRAILING_WS) != 0) {
1!
410
                            normalized = charBuf.normalize(XMLString.SUPPRESS_TRAILING_WS | XMLString.COLLAPSE_WS);
×
411
                        } else {
×
412
                            //normalized = charBuf.normalize(XMLString.COLLAPSE_WS);
413
                            normalized = charBuf.normalize(normalize);
1✔
414
                        }
415
                    }
416
                }
417

418
            }
419
        }
420
            if (normalized != null) {
1!
421
                text.setData(normalized);
1✔
422
                text.setOwnerDocument(document);
1✔
423
                last.appendChildInternal(prevNode, text);
1✔
424
                if (!validate) storeText();
1✔
425
                setPrevious(text);
1✔
426
            }
427
            charBuf.reset();
1✔
428
        }
429
    }
1✔
430

431
    @Override
432
    public void endElement(final String namespace, final String name, final String qname) {
433
        final ElementImpl last = stack.peek();
1✔
434
        processText(last, ProcessTextParent.ELEMENT_END);
1✔
435
        stack.pop();
1✔
436
        XMLString elemContent = null;
1✔
437
        try {
438
            if (!validate && RangeIndexSpec.hasQNameOrValueIndex(last.getIndexType())) {
1✔
439
                elemContent = nodeContentStack.pop();
1✔
440
            }
441
            if (validate) {
1✔
442
                if (childCnt != null) {
1!
443
                    setChildCount(last);
1✔
444
                }
445
            } else {
1✔
446
                final String content = elemContent == null ? null : elemContent.toString();
1✔
447
                broker.endElement(last, currentPath, content);
1✔
448
                if (childCnt == null && last.getChildCount() > 0
1!
449
                        || (childCnt != null && childCnt[last.getPosition()] != last.getChildCount())) {
1!
450
                    broker.updateNode(transaction, last, false);
×
451
                }
452

453
                if (indexListener != null) {
1!
454
                    indexListener.endElement(transaction, last, currentPath);
1✔
455
                }
456
            }
457
            currentPath.removeLastNode();
1✔
458
            setPrevious(last);
1✔
459
            level--;
1✔
460
        } finally {
1✔
461
            if (elemContent != null) {
1✔
462
                elemContent.reset();
1✔
463
            }
464
        }
465
    }
1✔
466

467
    /**
468
     * @param last The last element
469
     */
470
    private void setChildCount(final ElementImpl last) {
471
        if (last.getPosition() >= childCnt.length) {
1✔
472
            if (childCnt.length > CACHE_CHILD_COUNT_MAX) {
1!
473
                childCnt = null;
×
474
                return;
×
475
            }
476
            final int[] n = new int[childCnt.length * 2];
1✔
477
            System.arraycopy(childCnt, 0, n, 0, childCnt.length);
1✔
478
            childCnt = n;
1✔
479
        }
480
        childCnt[last.getPosition()] = last.getChildCount();
1✔
481
    }
1✔
482

483
    @Override
484
    public void endPrefixMapping(final String prefix) {
485
        if (ignorePrefix != null && prefix.equals(ignorePrefix)) {
1!
486
            ignorePrefix = null;
×
487
        } else {
×
488
            nsMappings.remove(prefix);
1✔
489
        }
490
    }
1✔
491

492
    @Override
493
    public void error(final SAXParseException e) throws SAXException {
494
        final String msg = "error at (" + e.getLineNumber() + "," + e.getColumnNumber() + ") : " + e.getMessage();
1✔
495
        LOG.debug(msg);
1✔
496
        throw new SAXException(msg, e);
1✔
497
    }
498

499
    @Override
500
    public void fatalError(final SAXParseException e) throws SAXException {
501
        final String msg = "fatal error at (" + e.getLineNumber() + "," + e.getColumnNumber() + ") : " + e.getMessage();
1✔
502
        LOG.debug(msg);
1✔
503
        throw new SAXException(msg, e);
1✔
504
    }
505

506
    @Override
507
    public void ignorableWhitespace(final char[] ch, final int start, final int length) {
508
        //Nothing to do
509
    }
1✔
510

511
    @Override
512
    public void processingInstruction(final String target, final String data) {
513
        final ProcessingInstructionImpl pi = new ProcessingInstructionImpl((Expression) null, target, data);
1✔
514
        pi.setOwnerDocument(document);
1✔
515
        if (stack.isEmpty()) {
1✔
516
            pi.setNodeId(broker.getBrokerPool().getNodeFactory().createInstance(nodeFactoryInstanceCnt++));
1✔
517

518
            if (!validate) broker.storeNode(transaction, pi, currentPath, indexSpec);
1✔
519

520
            document.appendChild((NodeHandle)pi);
1✔
521
        } else {
1✔
522
            final ElementImpl last = stack.peek();
1✔
523
            processText(last, ProcessTextParent.PI);
1✔
524
            last.appendChildInternal(prevNode, pi);
1✔
525
            setPrevious(pi);
1✔
526

527
            if (!validate) broker.storeNode(transaction, pi, currentPath, indexSpec);
1✔
528
        }
529
    }
1✔
530

531
    @Override
532
    public void setDocumentLocator(final Locator locator) {
533
        this.locator = locator;
1✔
534
    }
1✔
535

536
    @Override
537
    public void startCDATA() {
538
        if (!stack.isEmpty()) {
1!
539
            processText(stack.peek(), ProcessTextParent.CDATA_START);
1✔
540
        }
541
        inCDATASection = true;
1✔
542
    }
1✔
543

544
    // Methods of interface LexicalHandler
545
    // used to determine Doctype
546

547
    @Override
548
    public void startDTD(final String name, final String publicId, final String systemId) {
549
        final DocumentTypeImpl docType = new DocumentTypeImpl(null, name, publicId, systemId);
1✔
550
        document.setDocumentType(docType);
1✔
551
        insideDTD = true;
1✔
552
    }
1✔
553

554
    @Override
555
    public void startDocument() {
556
        if (!validate) {
1✔
557
            progress = new ProgressIndicator(currentLine, 100);
1✔
558
            document.setChildCount(0);
1✔
559
            elementCnt = 0;
1✔
560
            if(indexListener != null) {
1!
561
                indexListener.startIndexDocument(transaction);
1✔
562
            }
563
        }
564
        docSize = 0;
1✔
565

566
        /* 
567
         * Reset node id count
568
         * 
569
         * We set this to 1 instead of 0 to match the InMemmory serializer which
570
         * considers the Document to be the first node with an id.
571
         */
572
        nodeFactoryInstanceCnt = 1;
1✔
573
    }
1✔
574

575
    @Override
576
    public void declaration(@Nullable final String version, @Nullable final String encoding, @Nullable final String standalone) throws SAXException {
577
        final XMLDeclarationImpl xmlDecl = new XMLDeclarationImpl(version, encoding, standalone);
1✔
578
        document.setXmlDeclaration(xmlDecl);
1✔
579
    }
1✔
580

581
    final boolean hasNormAttribute(final Attributes attributes) {
582
        for (int i = 0; i < attributes.getLength(); i++) {
×
583
            if(attributes.getLocalName(i).equals("norm")) {
×
584
                return true;
×
585
            }
586
        }
587
        return false;
×
588
    }
589

590
    @Override
591
    public void startElement(final String namespace, final String name, final String qname, final Attributes attributes) throws SAXException {
592
        // calculate number of real attributes:
593
        // don't store namespace declarations
594
        int attrLength = attributes.getLength();
1✔
595
        for (int i = 0; i < attributes.getLength(); i++) {
1✔
596
            final String attrNS = attributes.getURI(i);
1✔
597
            final String attrQName = attributes.getQName(i);
1✔
598
            if (attrQName.startsWith("xmlns")
1✔
599
                    || attrNS.equals(Namespaces.EXIST_NS)) {
1!
600
                --attrLength;
1✔
601
            }
602
        }
603

604
        ElementImpl node;
605
        int p = qname.indexOf(':');
1✔
606
        final String prefix = (p != Constants.STRING_NOT_FOUND) ? qname.substring(0, p) : "";
1✔
607
        final QName qn = broker.getBrokerPool().getSymbols().getQName(Node.ELEMENT_NODE, namespace, name, prefix);
1✔
608

609
        if (!stack.isEmpty()) {
1✔
610
            final ElementImpl last = stack.peek();
1✔
611
            processText(last, ProcessTextParent.ELEMENT_START);
1✔
612
            try {
613
                if (!usedElements.isEmpty()) {
1✔
614
                    node = usedElements.pop();
1✔
615
                    node.setNodeName(qn, broker.getBrokerPool().getSymbols());
1✔
616
                } else {
1✔
617
                    node = new ElementImpl((last != null) ? last.getExpression() : null, qn, broker.getBrokerPool().getSymbols());
1!
618
                }
619
            } catch (final DOMException e) {
1✔
620
                throw new SAXException(e.getMessage(), e);
×
621
            }
622
            // copy xml:space setting
623
            node.setPreserveSpace(last.preserveSpace());
1✔
624
            // append the node to its parent
625
            // (computes the node id and updates the parent's child count)
626
            last.appendChildInternal(prevNode, node);
1✔
627
            setPrevious(null);
1✔
628
            node.setOwnerDocument(document);
1✔
629
            node.setAttributes((short) attrLength);
1✔
630
            if (!nsMappings.isEmpty()) {
1✔
631
                node.setNamespaceMappings(nsMappings);
1✔
632
                nsMappings.clear();
1✔
633
            }
634
            stack.push(node);
1✔
635
            currentPath.addNode(node, attributes);
1✔
636
            node.setPosition(elementCnt++);
1✔
637
            if (!validate) {
1✔
638
                if (childCnt != null) {
1!
639
                    node.setChildCount(childCnt[node.getPosition()]);
1✔
640
                }
641
                storeElement(node);
1✔
642
            }
643
        } else {
1✔
644
            try {
645
                node = new ElementImpl(null, qn, broker.getBrokerPool().getSymbols());
1✔
646
            } catch (final DOMException e) {
1✔
647
                throw new SAXException(e.getMessage(), e);
×
648
            }
649
            rootNode = node;
1✔
650
            setPrevious(null);
1✔
651
            node.setOwnerDocument(document);
1✔
652
            node.setNodeId(broker.getBrokerPool().getNodeFactory().createInstance(nodeFactoryInstanceCnt++));
1✔
653
            node.setAttributes((short) attrLength);
1✔
654
            if (!nsMappings.isEmpty()) {
1✔
655
                node.setNamespaceMappings(nsMappings);
1✔
656
                nsMappings.clear();
1✔
657
            }
658
            stack.push(node);
1✔
659
            currentPath.addNode(node, attributes);
1✔
660
            node.setPosition(elementCnt++);
1✔
661
            if (!validate) {
1✔
662
                if (childCnt != null) {
1!
663
                    node.setChildCount(childCnt[node.getPosition()]);
1✔
664
                }
665
                storeElement(node);
1✔
666
            }
667
            document.appendChild((NodeHandle)node);
1✔
668
        }
669
        level++;
1✔
670

671
        for (int i = 0; i < attributes.getLength(); i++) {
1✔
672
            final String attrNS = attributes.getURI(i);
1✔
673
            final String attrLocalName = attributes.getLocalName(i);
1✔
674
            final String attrQName = attributes.getQName(i);
1✔
675
            // skip xmlns-attributes and attributes in eXist's namespace
676
            if (attrQName.startsWith("xmlns") || attrNS.equals(Namespaces.EXIST_NS)) {
1!
677
                --attrLength;
1✔
678
            } else {
1✔
679
                p = attrQName.indexOf(':');
1✔
680
                final String attrPrefix = (p != Constants.STRING_NOT_FOUND) ? attrQName.substring(0, p) : null;
1✔
681
                final AttrImpl attr = (AttrImpl) NodePool.getInstance().borrowNode(Node.ATTRIBUTE_NODE);
1✔
682
                final QName attrQN = broker.getBrokerPool().getSymbols().getQName(Node.ATTRIBUTE_NODE, attrNS, attrLocalName, attrPrefix);
1✔
683
                try {
684
                    attr.setNodeName(attrQN, broker.getBrokerPool().getSymbols());
1✔
685
                } catch (final DOMException e) {
1✔
686
                    throw new SAXException(e.getMessage(), e);
×
687
                }
688
                attr.setValue(attributes.getValue(i));
1✔
689
                attr.setOwnerDocument(document);
1✔
690
                if (attributes.getType(i).equals(ATTR_ID_TYPE)) {
1✔
691
                    attr.setType(AttrImpl.ID);
1✔
692
                } else if (attributes.getType(i).equals(ATTR_IDREF_TYPE)) {
1✔
693
                    attr.setType(AttrImpl.IDREF);
1✔
694
                } else if (attributes.getType(i).equals(ATTR_IDREFS_TYPE)) {
1!
695
                    attr.setType(AttrImpl.IDREFS);
×
696
                } else if (attr.getQName().equals(Namespaces.XML_ID_QNAME)) {
1✔
697
                    // an xml:id attribute. Normalize the attribute and set its
698
                    // type to ID
699
                    attr.setValue(StringValue.trimWhitespace(StringValue.collapseWhitespace(attr.getValue())));
1✔
700

701
                    attr.setType(AttrImpl.ID);
1✔
702
                } else if (attr.getQName().equals(Namespaces.XML_SPACE_QNAME)) {
1✔
703
                    node.setPreserveSpace("preserve".equals(attr.getValue()));
1✔
704
                }
705
                node.appendChildInternal(prevNode, attr);
1✔
706
                setPrevious(attr);
1✔
707
                if (!validate) {
1✔
708
                    broker.storeNode(transaction, attr, currentPath, indexSpec);
1✔
709

710
                    if (indexListener != null) {
1!
711
                        indexListener.attribute(transaction, attr, currentPath);
1✔
712
                    }
713
                }
714
            }
715
        }
716
        if (attrLength > 0) {
1✔
717
            node.setAttributes((short) attrLength);
1✔
718
        }
719

720
        // notify observers about progress every 100 lines
721
        if (locator != null) {
1✔
722
            currentLine = locator.getLineNumber();
1✔
723
            if (!validate) {
1✔
724
                progress.setValue(currentLine);
1✔
725
            }
726
        }
727
        docSize++;
1✔
728
    }
1✔
729

730
    private void storeText() {
731
        if (!nodeContentStack.isEmpty()) {
1✔
732
            for (final XMLString next : nodeContentStack) {
1✔
733
                next.append(charBuf);
1✔
734
            }
735
        }
736
        broker.storeNode(transaction, text, currentPath, indexSpec);
1✔
737

738
        if (indexListener != null) {
1!
739
            indexListener.characters(transaction, text, currentPath);
1✔
740
        }
741
    }
1✔
742

743
    private void storeElement(final ElementImpl node) {
744
        broker.storeNode(transaction, node, currentPath, indexSpec);
1✔
745

746
        if (indexListener != null) {
1!
747
            indexListener.startElement(transaction, node, currentPath);
1✔
748
        }
749

750
        node.setChildCount(0);
1✔
751
        if (RangeIndexSpec.hasQNameOrValueIndex(node.getIndexType())) {
1✔
752
            final XMLString contentBuf = new XMLString();
1✔
753
            nodeContentStack.push(contentBuf);
1✔
754
        }
755
    }
1✔
756

757
    @Override
758
    public void startEntity(final String name) {
759
        // while validating, all entities are put into a map
760
        // to cache them for later use
761
        if (validate) {
1✔
762
            if (entityMap == null) {
1✔
763
                entityMap = new HashMap<>();
1✔
764
            }
765
            currentEntityName = name;
1✔
766
        }
767
    }
1✔
768

769
    @Override
770
    public void endEntity(final String name) {
771
        // store the entity into a map for later
772
        if (validate && currentEntityValue != null) {
1!
773
            entityMap.put(currentEntityName, currentEntityValue.toString());
1✔
774
            currentEntityName = null;
1✔
775
            currentEntityValue.reset();
1✔
776
        }
777
    }
1✔
778

779
    @Override
780
    public void skippedEntity(final String name) {
781
        if (!validate && entityMap != null) {
1!
782
            final String value = entityMap.get(name);
×
783

784
            if (value != null) {
×
785
                characters(value.toCharArray(), 0, value.length());
×
786
            }
787
        }
788
    }
1✔
789

790
    @Override
791
    public void startPrefixMapping(final String prefix, final String uri) {
792
        // skip the eXist namespace
793
        // if (uri.equals(Namespaces.EXIST_NS)) {
794
        // ignorePrefix = prefix;
795
        // return;
796
        // }
797
        nsMappings.put(prefix, uri);
1✔
798
    }
1✔
799

800
    @Override
801
    public void warning(final SAXParseException e) throws SAXException {
802
        final String msg = "warning at (" + e.getLineNumber() + "," + e.getColumnNumber() + ") : " + e.getMessage();
×
803
        throw new SAXException(msg, e);
×
804
    }
805

806
    private void setPrevious(final StoredNode previous) {
807
        if (prevNode != null) {
1✔
808
            switch (prevNode.getNodeType()) {
1✔
809
            case Node.ATTRIBUTE_NODE:
810
                prevNode.release();
1✔
811
                break;
1✔
812
            case Node.ELEMENT_NODE:
813
                if (prevNode != rootNode) {
1!
814
                    prevNode.clear();
1✔
815
                    usedElements.push((ElementImpl) prevNode);
1✔
816
                }
817
                break;
1✔
818
            case Node.TEXT_NODE:
819
                prevNode.clear();
1✔
820
                break;
821
            }
822
        }
823
        prevNode = previous;
1✔
824
    }
1✔
825
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc