• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

evolvedbinary / elemental / 982

29 Apr 2025 08:34PM UTC coverage: 56.409% (+0.007%) from 56.402%
982

push

circleci

adamretter
[feature] Improve README.md badges

28451 of 55847 branches covered (50.94%)

Branch coverage included in aggregate %.

77468 of 131924 relevant lines covered (58.72%)

0.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.26
/exist-core/src/main/java/org/exist/dom/persistent/SymbolTable.java
1
/*
2
 * Elemental
3
 * Copyright (C) 2024, Evolved Binary Ltd
4
 *
5
 * admin@evolvedbinary.com
6
 * https://www.evolvedbinary.com | https://www.elemental.xyz
7
 *
8
 * Use of this software is governed by the Business Source License 1.1
9
 * included in the LICENSE file and at www.mariadb.com/bsl11.
10
 *
11
 * Change Date: 2028-04-27
12
 *
13
 * On the date above, in accordance with the Business Source License, use
14
 * of this software will be governed by the Apache License, Version 2.0.
15
 *
16
 * Additional Use Grant: Production use of the Licensed Work for a permitted
17
 * purpose. A Permitted Purpose is any purpose other than a Competing Use.
18
 * A Competing Use means making the Software available to others in a commercial
19
 * product or service that: substitutes for the Software; substitutes for any
20
 * other product or service we offer using the Software that exists as of the
21
 * date we make the Software available; or offers the same or substantially
22
 * similar functionality as the Software.
23
 *
24
 * NOTE: Parts of this file contain code from 'The eXist-db Authors'.
25
 *       The original license header is included below.
26
 *
27
 * =====================================================================
28
 *
29
 * eXist-db Open Source Native XML Database
30
 * Copyright (C) 2001 The eXist-db Authors
31
 *
32
 * info@exist-db.org
33
 * http://www.exist-db.org
34
 *
35
 * This library is free software; you can redistribute it and/or
36
 * modify it under the terms of the GNU Lesser General Public
37
 * License as published by the Free Software Foundation; either
38
 * version 2.1 of the License, or (at your option) any later version.
39
 *
40
 * This library is distributed in the hope that it will be useful,
41
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
43
 * Lesser General Public License for more details.
44
 *
45
 * You should have received a copy of the GNU Lesser General Public
46
 * License along with this library; if not, write to the Free Software
47
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
48
 */
49
package org.exist.dom.persistent;
50

51
import it.unimi.dsi.fastutil.objects.Object2IntMap;
52
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
53
import org.apache.logging.log4j.LogManager;
54
import org.apache.logging.log4j.Logger;
55
import org.exist.EXistException;
56
import org.exist.backup.RawDataBackup;
57
import org.exist.dom.QName;
58
import org.exist.storage.*;
59
import org.exist.storage.io.VariableByteInput;
60
import org.exist.storage.io.VariableByteInputStream;
61
import org.exist.storage.io.VariableByteOutputStream;
62
import org.exist.util.Configuration;
63
import org.exist.util.FileUtils;
64
import org.w3c.dom.Attr;
65
import org.w3c.dom.Element;
66
import org.w3c.dom.Node;
67

68
import java.io.*;
69
import java.nio.file.Files;
70
import java.nio.file.Path;
71
import java.nio.file.StandardOpenOption;
72

73
/**
74
 * Maintains a global symbol table shared by a database instance. The symbol
75
 * table maps namespace URIs and node names to unique, numeric ids. Internally,
76
 * the db does not store node QNames in clear text. Instead, it uses the numeric ids
77
 * maintained here.
78
 *
79
 * The global SymbolTable singleton can be retrieved from {@link org.exist.storage.BrokerPool#getSymbols()}.
80
 * It is saved into the database file "symbols.dbx".
81
 *
82
 * @author <a href="mailto:wolfgang@exist-db.org">Wolfgang Meier</a>
83
 * @author <a href="mailto:adam@evolvedbinary.com">Adam Retter</a>
84
 */
85
public class SymbolTable implements BrokerPoolService, Closeable {
1✔
86

87
    private static final Logger LOG = LogManager.getLogger(SymbolTable.class);
1✔
88

89
    private static final String FILE_NAME = "symbols.dbx";
90

91
    public static final short FILE_FORMAT_VERSION_ID = 8;
92
    public static final short LEGACY_FILE_FORMAT_VERSION_ID = 7;
93

94
    public enum SymbolType {
1✔
95
        NAME((byte) 0),
1✔
96
        NAMESPACE((byte) 1),
1✔
97
        MIMETYPE((byte) 2);
1✔
98

99
        private final byte typeId;
100
        private SymbolType(final byte typeId) {
1✔
101
            this.typeId = typeId;
1✔
102
        }
1✔
103

104
        public final byte getTypeId() {
105
            return typeId;
1✔
106
        }
107

108
        public static SymbolType valueOf(final byte typeId) {
109
            for(final SymbolType symbolType : SymbolType.values()) {
1!
110
                if(symbolType.getTypeId() == typeId) {
1✔
111
                    return symbolType;
1✔
112
                }
113
            }
114
            throw new IllegalArgumentException("No such enumerated value for typeId:" + typeId);
×
115
        }
116
    }
117

118
    public static final int LENGTH_LOCAL_NAME = 2; //sizeof short
119
    public static final int LENGTH_NS_URI = 2; //sizeof short
120

121
    public static final char ATTR_NAME_PREFIX = '@';
1✔
122

123
    protected final SymbolCollection localNameSymbols = new LocalNameSymbolCollection(SymbolType.NAME, 200);
1✔
124
    protected final SymbolCollection namespaceSymbols = new SymbolCollection(SymbolType.NAMESPACE, 200);
1✔
125
    protected final SymbolCollection mimeTypeSymbols = new SymbolCollection(SymbolType.MIMETYPE, 32);
1✔
126

127
    /**
128
     * Temporary name pool to share QName instances during indexing.
129
     */
130
    private final QNamePool namePool = new QNamePool();
1✔
131

132
    /**
133
     * set to true if the symbol table needs to be saved
134
     */
135
    private boolean changed = false;
1✔
136

137
    /**
138
     * the underlying symbols.dbx file
139
     */
140
    private Path file;
141
    private final VariableByteOutputStream outBuffer = new VariableByteOutputStream(256);
1✔
142
    private OutputStream os = null;
1✔
143

144
    @Override
145
    public void configure(final Configuration configuration) {
146
        final Path dataDir = (Path) configuration.getProperty(BrokerPool.PROPERTY_DATA_DIR);
1✔
147
        this.file = dataDir.resolve(getFileName());
1✔
148
    }
1✔
149

150
    @Override
151
    public void prepare(final BrokerPool pool) throws BrokerPoolServiceException {
152
        try {
153
            if (!Files.isReadable(file)) {
1✔
154
                saveSymbols();
1✔
155
            } else {
1✔
156
                loadSymbols();
1✔
157
            }
158
        } catch(final EXistException e) {
1✔
159
            throw new BrokerPoolServiceException(e);
×
160
        }
161
    }
1✔
162

163
    @Override
164
    public void stopSystem(final DBBroker systemBroker) throws BrokerPoolServiceException {
165
        try {
166
            close();
1✔
167
        } catch (final IOException e) {
1✔
168
            throw new BrokerPoolServiceException(e);
×
169
        }
170
    }
1✔
171

172
    public static final String getFileName() {
173
        return FILE_NAME;
1✔
174
    }
175

176
    /**
177
     * Retrieve a shared QName instance from the temporary pool.
178
     *
179
     * TODO: make the namePool thread-local to avoid synchronization.
180
     * @param type qname type
181
     * @param namespaceURI qname namespace uri
182
     * @param localName qname localname
183
     * @param prefix qname prefix
184
     * @return qname from pool
185
     */
186
    public synchronized QName getQName(final short type, final String namespaceURI, final String localName, final String prefix) {
187
        final byte itype = type == Node.ATTRIBUTE_NODE ? ElementValue.ATTRIBUTE : ElementValue.ELEMENT;
1✔
188
        QName qn = namePool.get(itype, namespaceURI, localName, prefix);
1✔
189
        if(qn == null) {
1✔
190
            qn = namePool.add(itype, namespaceURI, localName, prefix);
1✔
191
        }
192
        return qn;
1✔
193
    }
194

195
    /**
196
     * Return a unique id for the local node name of the specified element.
197
     *
198
     * @param element the element to create a unique id for
199
     * @return unique id for the local node name of the specified element.
200
     */
201
    //TODO the (short) cast is nasty - should consider using either short or int end to end
202
    public synchronized short getSymbol(final Element element) {
203
        return (short) localNameSymbols.getId(element.getLocalName());
1✔
204
    }
205

206
    /**
207
     * Return a unique id for the local node name of the specified attribute.
208
     *
209
     * @param attr the attribute to create a unique id for
210
     * @return unique id for the local node name of the specified attribute.
211
     */
212
    //TODO the (short) cast is nasty - should consider using either short or int end to end
213
    public synchronized short getSymbol(final Attr attr) {
214
        final String key = ATTR_NAME_PREFIX + attr.getLocalName();
1✔
215
        return (short) localNameSymbols.getId(key);
1✔
216
    }
217

218
    /**
219
     * Returns a unique id for the specified local name. If the name is
220
     * the local name of an attribute, it should start with a '@' character.
221
     *
222
     * @param name local name
223
     * @return unique id for local name
224
     */
225
    //TODO the (short) cast is nasty - should consider using either short or int end to end
226
    public synchronized short getSymbol(final String name) {
227
        if(name.length() == 0) {
1✔
228
            throw new IllegalArgumentException("name is empty");
1✔
229
        }
230
        return (short) localNameSymbols.getId(name);
1✔
231
    }
232

233
    /**
234
     * Returns a unique id for the specified namespace URI.
235
     *
236
     * @param ns namespace uri
237
     * @return unique id for namespace uri
238
     */
239
    //TODO the (short) cast is nasty - should consider using either short or int end to end
240
    public synchronized short getNSSymbol(final String ns) {
241
        if(ns == null || ns.length() == 0) {
1✔
242
            return 0;
1✔
243
        }
244
        return (short) namespaceSymbols.getId(ns);
1✔
245
    }
246

247
    public synchronized int getMimeTypeId(final String mimeType) {
248
        return mimeTypeSymbols.getId(mimeType);
1✔
249
    }
250

251
    /**
252
     * @return true if the symbol table needs to be saved to persistent storage.
253
     *
254
     */
255
    public synchronized boolean hasChanged() {
256
        return changed;
×
257
    }
258

259
    /**
260
     * Returns the local name registered for the id or
261
     * null if the name is not known.
262
     *
263
     * @param id identifier
264
     * @return the local name registered for the id or null if the name is not known.
265
     */
266
    public synchronized String getName(final short id) {
267
        return localNameSymbols.getSymbol(id);
1✔
268
    }
269

270
    public synchronized String getMimeType(final int id) {
271
        return mimeTypeSymbols.getSymbol(id);
1✔
272
    }
273

274
    /**
275
     * Returns the namespace URI registered for the id or null
276
     * if the namespace URI is not known. Returns the empty string
277
     * if the namespace is empty.
278
     *
279
     * @param id identifier
280
     * @return  the namespace URI registered for the id or null
281
     */
282
    public synchronized String getNamespace(final short id) {
283
        return namespaceSymbols.getSymbol(id);
1✔
284
    }
285

286
    /**
287
     * Write the symbol table to persistent storage. Only called when upgrading
288
     * a .dbx file from previous versions.
289
     *
290
     * @param os outputstream
291
     * @throws IOException in response to an IO error
292
     */
293
    private synchronized void writeAll(final VariableByteOutputStream os) throws IOException {
294
        os.writeFixedInt(FILE_FORMAT_VERSION_ID);
1✔
295
        localNameSymbols.write(os);
1✔
296
        namespaceSymbols.write(os);
1✔
297
        mimeTypeSymbols.write(os);
1✔
298
        changed = false;
1✔
299
    }
1✔
300

301
    /**
302
     * Read the symbol table from disk.
303
     *
304
     * @param is input
305
     * @throws IOException in response to an IO error
306
     */
307
    protected final void read(final VariableByteInput is) throws IOException {
308
        localNameSymbols.clear();
1✔
309
        namespaceSymbols.clear();
1✔
310
        mimeTypeSymbols.clear();
1✔
311
        while(is.available() > 0) {
1✔
312
            readEntry(is);
1✔
313
        }
314
    }
1✔
315

316
    private void readEntry(final VariableByteInput is) throws IOException {
317
        final byte type = is.readByte();
1✔
318
        final int id = is.readInt();
1✔
319
        final String key = is.readUTF();
1✔
320
        //symbol types can be written in any order by SymbolCollection.getById()->SymbolCollection.write()
321
        switch(SymbolType.valueOf(type)) {
1!
322
            case NAME:
323
                localNameSymbols.add(id, key);
1✔
324
                break;
1✔
325
            case NAMESPACE:
326
                namespaceSymbols.add(id, key);
1✔
327
                break;
1✔
328
            case MIMETYPE:
329
                mimeTypeSymbols.add(id, key);
1✔
330
                break;
331
            //Removed default clause
332
        }
333
    }
1✔
334

335
    /**
336
     * Legacy method: read a symbol table written by a previous eXist version.
337
     *
338
     * @param istream input
339
     * @throws IOException in response to an IO error
340
     */
341
    protected final void readLegacy(final VariableByteInput istream) throws IOException {
342
        istream.readShort(); //read max, not needed anymore
1✔
343
        istream.readShort(); //read nsMax not needed anymore
1✔
344
        String key;
345
        short id;
346
        //read local names
347
        int count = istream.readInt();
1✔
348
        for(int i = 0; i < count; i++) {
1✔
349
            key = istream.readUTF();
1✔
350
            id = istream.readShort();
1✔
351
            localNameSymbols.add(id, key);
1✔
352
        }
353
        //read namespaces
354
        count = istream.readInt();
1✔
355
        for(int i = 0; i < count; i++) {
1✔
356
            key = istream.readUTF();
1✔
357
            id = istream.readShort();
1✔
358
            namespaceSymbols.add(id, key);
1✔
359
        }
360
        // default mappings have been removed
361
        // read them for backwards compatibility
362
        count = istream.readInt();
1✔
363
        for(int i = 0; i < count; i++) {
1✔
364
            istream.readUTF();
1✔
365
            istream.readShort();
1✔
366
        }
367
        //read namespaces
368
        count = istream.readInt();
1✔
369
        int mimeId;
370
        for(int i = 0; i < count; i++) {
1✔
371
            key = istream.readUTF();
1✔
372
            mimeId = istream.readInt();
1✔
373
            mimeTypeSymbols.add(mimeId, key);
1✔
374
        }
375
        changed = false;
1✔
376
    }
1✔
377

378
    public final Path getFile() {
379
        return file;
1✔
380
    }
381

382
    /**
383
     * Save the entire symbol table. Will only be called when initializing an
384
     * empty database or when upgrading an older dbx file.
385
     *
386
     * @throws EXistException in response to the error
387
     */
388
    private void saveSymbols() throws EXistException {
389
        try(final VariableByteOutputStream os = new VariableByteOutputStream(8192);
1✔
390
                final OutputStream fos =  new BufferedOutputStream(Files.newOutputStream(getFile()))) {
1✔
391
            writeAll(os);
1✔
392
            fos.write(os.toByteArray());
1✔
393
        } catch(final FileNotFoundException e) {
×
394
            throw new EXistException("File not found: " + this.getFile().toAbsolutePath().toString(), e);
×
395
        } catch(final IOException e) {
×
396
            throw new EXistException("IO error occurred while creating "
×
397
                + this.getFile().toAbsolutePath().toString(), e);
×
398
        }
399
    }
1✔
400

401
    /**
402
     * Read the global symbol table. The global symbol table stores QNames and
403
     * namespace/prefix mappings.
404
     *
405
     * @throws EXistException in response to the error
406
     */
407
    private synchronized void loadSymbols() throws EXistException {
408
        try(final InputStream fis = new BufferedInputStream(Files.newInputStream(getFile()))) {
1✔
409

410
            final VariableByteInput is = new VariableByteInputStream(fis);
1✔
411
            final int magic = is.readFixedInt();
1✔
412
            if(magic == LEGACY_FILE_FORMAT_VERSION_ID) {
1!
413
                LOG.info("Converting legacy symbols.dbx to new format...");
×
414
                readLegacy(is);
×
415
                saveSymbols();
×
416
            } else if(magic != FILE_FORMAT_VERSION_ID) {
1!
417
                throw new EXistException("Symbol table was created by an older" +
×
418
                    "or newer version of eXist" + " (file id: " + magic + "). " +
×
419
                    "To avoid damage, the database will stop.");
420
            } else {
421
                read(is);
1✔
422
            }
423
        } catch(final FileNotFoundException e) {
×
424
            throw new EXistException("Could not read " + this.getFile().toAbsolutePath().toString(), e);
×
425
        } catch(final IOException e) {
×
426
            throw new EXistException("IO error occurred while reading "
×
427
                + this.getFile().toAbsolutePath().toString() + ": " + e.getMessage(), e);
×
428
        }
429
    }
1✔
430

431
    public void backupSymbolsTo(final OutputStream os) throws IOException {
432
        Files.copy(getFile(), os);
1✔
433
    }
1✔
434

435
    public void backupToArchive(final RawDataBackup backup) throws IOException {
436
        // do not use try-with-resources here, closing the OutputStream will close the entire backup
437
        //try(final OutputStream os = backup.newEntry(FileUtils.fileName(getFile()))) {
438
        try {
439
            final OutputStream os = backup.newEntry(FileUtils.fileName(getFile()));
1✔
440
            backupSymbolsTo(os);
1✔
441
        } finally {
1✔
442
            backup.closeEntry();
1✔
443
        }
444
    }
1✔
445

446
    public void flush() throws EXistException {
447
        //Noting to do ? -pb
448
    }
1✔
449

450
    private OutputStream getOutputStream() throws IOException {
451
        if(os == null) {
1✔
452
            os = new BufferedOutputStream(Files.newOutputStream(getFile(), StandardOpenOption.APPEND));
1✔
453
        }
454
        return os;
1✔
455
    }
456

457
    @Override
458
    public void close() throws IOException {
459
        outBuffer.close();
1✔
460
        if(os != null) {
1✔
461
            os.close();
1✔
462
        }
463
    }
1✔
464

465
    /**
466
     * Represents a distinct collection of symbols
467
     *
468
     * @author <a href="mailto:wolfgang@exist-db.org">Wolfgang Meier</a>
469
     * @author <a href="mailto:adam@evolvedbinary.com">Adam Retter</a>
470
     */
471
    protected class SymbolCollection {
472

473
        private final SymbolType symbolType;
474

475
        /**
476
         * Maps mimetype names to an integer id (persisted to disk)
477
         */
478
        private final Object2IntMap<String> symbolsByName;
479

480
        /**
481
         * Maps int ids to mimetype names (transient map for fast reverse lookup of symbolsByName)
482
         */
483
        private String[] symbolsById;
484

485
        /**
486
         * contains the offset of the last symbol
487
         */
488
        protected short offset = 0;
1✔
489

490
        public SymbolCollection(final SymbolType symbolType, final int initialSize) {
1✔
491
            this.symbolType = symbolType;
1✔
492
            symbolsByName = new Object2IntOpenHashMap<>(initialSize);
1✔
493
            symbolsByName.defaultReturnValue(-1);
1✔
494
            symbolsById = new String[initialSize];
1✔
495
        }
1✔
496

497
        private SymbolType getSymbolType() {
498
            return symbolType;
1✔
499
        }
500

501
        private int add(final int id, final String name) {
502
            symbolsById = ensureCapacity(symbolsById, id);
1✔
503
            addSymbolById(id, name);
1✔
504
            addSymbolByName(name, id);
1✔
505
            if(id > offset) {
1✔
506
                offset = (short) id;
1✔
507
            }
508
            return id;
1✔
509
        }
510

511
        protected void addSymbolById(final int id, final String name) {
512
            symbolsById[id] = name;
1✔
513
        }
1✔
514

515
        protected void addSymbolByName(final String name, final int id) {
516
            symbolsByName.put(name, id);
1✔
517
        }
1✔
518

519
        protected String[] ensureCapacity(final String[] array, final int max) {
520
            if(array.length <= max) {
1✔
521
                final String[] newArray = new String[(max * 3) / 2];
1✔
522
                System.arraycopy(array, 0, newArray, 0, array.length);
1✔
523
                return newArray;
1✔
524
            }
525
            return array;
1✔
526
        }
527

528
        private void clear() {
529
            offset = 0;
1✔
530
        }
1✔
531

532
        public synchronized String getSymbol(final int id) {
533
            if(id <= 0 || id > offset) {
1!
534
                return ""; //TODO : raise an exception ? -pb
1✔
535
            }
536
            return symbolsById[id];
1✔
537
        }
538

539
        public synchronized int getId(final String name) {
540
            int id = symbolsByName.getInt(name);
1✔
541
            if(id != -1) {
1✔
542
                return id;
1✔
543
            }
544
            // symbol space exceeded. return -1 to indicate.
545
            if(offset == Short.MAX_VALUE) {
1!
546
                return -1;
×
547
            }
548

549
            id = add(++offset, name);
1✔
550
            //we use "++offset" here instead of "offset++", 
551
            //because the system expects id's to start at 1, not 0
552
            write(id, name);
1✔
553
            changed = true;
1✔
554
            return id;
1✔
555
        }
556

557
        protected final void write(final VariableByteOutputStream os) throws IOException {
558
            for (final String symbol : symbolsByName.keySet()) {
1✔
559
                final int id = symbolsByName.getInt(symbol);
1✔
560
                if (id < 0) {
1!
561
                    LOG.error("Symbol Table: symbolTypeId={}, symbol='{}', id={}", getSymbolType(), symbol, id);
×
562
                    //TODO : raise exception ? -pb
563
                }
564
                writeEntry(id, symbol, os);
1✔
565
            }
566
        }
1✔
567

568
        // Append a new entry to the .dbx file
569
        private void write(final int id, final String key) {
570
            outBuffer.clear();
1✔
571
            try {
572
                writeEntry(id, key, outBuffer);
1✔
573
                getOutputStream().write(outBuffer.toByteArray());
1✔
574
                getOutputStream().flush();
1✔
575
            } catch(final FileNotFoundException e) {
1✔
576
                LOG.error("Symbol table: file not found!", e);
×
577
                //TODO :throw exception -pb
578
            } catch(final IOException e) {
×
579
                LOG.error("Symbol table: caught exception while writing!", e);
×
580
                //TODO : throw exception -pb
581
            }
582
        }
1✔
583

584
        private void writeEntry(final int id, final String key, final VariableByteOutputStream os) throws IOException {
585
            os.writeByte(getSymbolType().getTypeId());
1✔
586
            os.writeInt(id);
1✔
587
            os.writeUTF(key);
1✔
588
        }
1✔
589
    }
590

591
    /**
592
     * Local name storage is used by both element names and attribute names
593
     *
594
     * Attributes behave slightly differently to element names
595
     * For the persistent map symbolsByName, the attribute name is prefixed with
596
     * an '@' symbol to differentiate the attribute name from a similar element name
597
     * However, for the in-memory reverse map symbolsById, the attribute name
598
     * should not be prefixed.
599
     *
600
     * @author <a href="mailto:adam@exist-db.org">Adam Retter</a>
601
     */
602
    private class LocalNameSymbolCollection extends SymbolCollection {
603

604
        public LocalNameSymbolCollection(final SymbolType symbolType, final int initialSize) {
1✔
605
            super(symbolType, initialSize);
1✔
606
        }
1✔
607

608
        @Override
609
        protected void addSymbolById(final int id, final String name) {
610
            /*
611
             For attributes, Don't store '@' in in-memory mapping of id -> attrName
612
             enables faster retrieval
613
             */
614
            if(name.charAt(0) == ATTR_NAME_PREFIX) {
1✔
615
                super.addSymbolById(id, name.substring(1));
1✔
616
            } else {
1✔
617
                super.addSymbolById(id, name);
1✔
618
            }
619
        }
1✔
620
    }
621
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc