• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

evolvedbinary / elemental / 982

29 Apr 2025 08:34PM UTC coverage: 56.409% (+0.007%) from 56.402%
982

push

circleci

adamretter
[feature] Improve README.md badges

28451 of 55847 branches covered (50.94%)

Branch coverage included in aggregate %.

77468 of 131924 relevant lines covered (58.72%)

0.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

47.1
/exist-core/src/main/java/org/exist/util/Collations.java
1
/*
2
 * Elemental
3
 * Copyright (C) 2024, Evolved Binary Ltd
4
 *
5
 * admin@evolvedbinary.com
6
 * https://www.evolvedbinary.com | https://www.elemental.xyz
7
 *
8
 * Use of this software is governed by the Business Source License 1.1
9
 * included in the LICENSE file and at www.mariadb.com/bsl11.
10
 *
11
 * Change Date: 2028-04-27
12
 *
13
 * On the date above, in accordance with the Business Source License, use
14
 * of this software will be governed by the Apache License, Version 2.0.
15
 *
16
 * Additional Use Grant: Production use of the Licensed Work for a permitted
17
 * purpose. A Permitted Purpose is any purpose other than a Competing Use.
18
 * A Competing Use means making the Software available to others in a commercial
19
 * product or service that: substitutes for the Software; substitutes for any
20
 * other product or service we offer using the Software that exists as of the
21
 * date we make the Software available; or offers the same or substantially
22
 * similar functionality as the Software.
23
 *
24
 * NOTE: Parts of this file contain code from 'The eXist-db Authors'.
25
 *       The original license header is included below.
26
 *
27
 * =====================================================================
28
 *
29
 * eXist-db Open Source Native XML Database
30
 * Copyright (C) 2001 The eXist-db Authors
31
 *
32
 * info@exist-db.org
33
 * http://www.exist-db.org
34
 *
35
 * This library is free software; you can redistribute it and/or
36
 * modify it under the terms of the GNU Lesser General Public
37
 * License as published by the Free Software Foundation; either
38
 * version 2.1 of the License, or (at your option) any later version.
39
 *
40
 * This library is distributed in the hope that it will be useful,
41
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
43
 * Lesser General Public License for more details.
44
 *
45
 * You should have received a copy of the GNU Lesser General Public
46
 * License along with this library; if not, write to the Free Software
47
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
48
 */
49
package org.exist.util;
50

51
import java.net.URI;
52
import java.net.URISyntaxException;
53
import java.text.StringCharacterIterator;
54
import java.util.Arrays;
55
import java.util.Comparator;
56
import java.util.List;
57
import java.util.StringTokenizer;
58
import java.util.concurrent.atomic.AtomicReference;
59
import java.util.stream.Collectors;
60

61
import com.ibm.icu.text.*;
62
import com.ibm.icu.util.ULocale;
63
import com.ibm.icu.util.VersionInfo;
64
import org.apache.logging.log4j.LogManager;
65
import org.apache.logging.log4j.Logger;
66
import org.exist.xquery.ErrorCodes;
67
import org.exist.xquery.Expression;
68
import org.exist.xquery.XPathException;
69

70
import javax.annotation.Nullable;
71

72
/**
73
 * Utility methods dealing with collations.
74
 *
75
 * @author wolf
76
 * @author <a href="mailto:adam@evolvedbinary.com">Adam Retter</a>
77
 */
78
public class Collations {
×
79

80
    private final static Logger logger = LogManager.getLogger(Collations.class);
1✔
81

82
    /**
83
     * The default Unicode Codepoint Collation URI as defined by the XQuery
84
     * spec.
85
     */
86
    public final static String UNICODE_CODEPOINT_COLLATION_URI = "http://www.w3.org/2005/xpath-functions/collation/codepoint";
87

88
    /**
89
     * Short string to select the default codepoint collation
90
     */
91
    public final static String CODEPOINT_SHORT = "codepoint";
92

93
    /**
94
     * The UCA (Unicode Collation Algorithm) Codepoint URI as defined by the XQuery
95
     * spec.
96
     */
97
    public final static String UCA_COLLATION_URI = "http://www.w3.org/2013/collation/UCA";
98

99

100
    /**
101
     * The HTML ASCII Case-Insensitive Collation as defined by the XPath F&amp;O spec.
102
     */
103
    public final static String HTML_ASCII_CASE_INSENSITIVE_COLLATION_URI = "http://www.w3.org/2005/xpath-functions/collation/html-ascii-case-insensitive";
104

105
    /**
106
     * The XQTS ASCII Case-blind Collation as defined by the XQTS 3.1.
107
     */
108
    public final static String XQTS_ASCII_CASE_BLIND_COLLATION_URI = "http://www.w3.org/2010/09/qt-fots-catalog/collation/caseblind";
109

110
    /**
111
     * The URI used to select collations in eXist.
112
     */
113
    public final static String EXIST_COLLATION_URI = "http://exist-db.org/collation";
114

115
    /**
116
     * Lazy-initialized singleton Html Ascii Case Insensitive Collator
117
     */
118
    private final static AtomicReference<Collator> htmlAsciiCaseInsensitiveCollator = new AtomicReference<>();
1✔
119

120
    /**
121
     * Lazy-initialized singleton XQTS Case Blind Collator
122
     */
123
    private final static AtomicReference<Collator> xqtsAsciiCaseBlindCollator = new AtomicReference<>();
1✔
124

125
    /**
126
     * Lazy-initialized singleton Samisk Collator
127
     */
128
    private final static AtomicReference<Collator> samiskCollator = new AtomicReference<>();
1✔
129

130
    /**
131
     * Get a {@link Comparator}from the specified URI.
132
     *
133
     * The original code is from saxon (@linkplain http://saxon.sf.net).
134
     *
135
     * 
136
     * @param uri The URI describing the collation and settings
137
     *
138
     * @return The Collator for the URI, or null.
139
     *
140
     * @throws XPathException If an error occurs whilst constructing the Collator
141
     */
142
    public static @Nullable Collator getCollationFromURI(final String uri) throws XPathException {
143
        return getCollationFromURI(uri, (Expression)null);
×
144
    }
145

146
    /**
147
     * Get a {@link Comparator}from the specified URI.
148
     *
149
     * The original code is from saxon (@linkplain http://saxon.sf.net).
150
     *
151
     *
152
     * @param uri The URI describing the collation and settings
153
     * @param errorCode the error code if the URI cannot be resolved
154
     *
155
     * @return The Collator for the URI, or null.
156
     *
157
     * @throws XPathException If an error occurs whilst constructing the Collator
158
     */
159
    public static @Nullable Collator getCollationFromURI(final String uri, final ErrorCodes.ErrorCode errorCode) throws XPathException {
160
        return getCollationFromURI(uri, null, errorCode);
1✔
161
    }
162

163
    /**
164
     * Get a {@link Comparator}from the specified URI.
165
     *
166
     * The original code is from saxon (@linkplain http://saxon.sf.net).
167
     *
168
     * 
169
     * @param uri The URI describing the collation and settings
170
     * @param expression The expression from which the collation derives
171
     *
172
     * @return The Collator for the URI, or null.
173
     *
174
     * @throws XPathException If an error occurs whilst constructing the Collator
175
     */
176
    public static @Nullable Collator getCollationFromURI(final String uri, @Nullable final Expression expression) throws XPathException {
177
        return getCollationFromURI(uri, expression, ErrorCodes.XQST0076);
1✔
178
    }
179

180
    /**
181
     * Get a {@link Comparator}from the specified URI.
182
     *
183
     * The original code is from saxon (@linkplain http://saxon.sf.net).
184
     *
185
     *
186
     * @param uri The URI describing the collation and settings
187
     * @param expression The expression from which the collation derives
188
     * @param errorCode the error code if the URI cannot be resolved
189
     *
190
     * @return The Collator for the URI, or null.
191
     *
192
     * @throws XPathException If an error occurs whilst constructing the Collator
193
     */
194
    public static @Nullable Collator getCollationFromURI(final String uri, @Nullable final Expression expression, final ErrorCodes.ErrorCode errorCode) throws XPathException {
195
        final Collator collator;
196

197
        if (uri.startsWith(EXIST_COLLATION_URI) || uri.startsWith(UCA_COLLATION_URI) || uri.startsWith("?")) {
1✔
198
            URI u;
199
            try {
200
                u = new URI(uri);
1✔
201
            } catch (final URISyntaxException e) {
1✔
202
                return null;
×
203
            }
204

205
            final String query = u.getQuery();
1✔
206
            if (query == null) {
1✔
207
                collator = Collator.getInstance();
1✔
208

209
            } else {
1✔
210

211
                boolean fallback = true;                // default is "yes"
1✔
212
                String lang = null;
1✔
213
                String version = null;
1✔
214
                String strength = null;
1✔
215
                String maxVariable = "punct";           // default is punct
1✔
216
                String alternate = "non-ignorable";     // default is non-ignorable
1✔
217
                boolean backwards = false;              // default is "no"
1✔
218
                boolean normalization = false;          // default is "no"
1✔
219
                boolean caseLevel = false;              // default is "no"
1✔
220
                String caseFirst = null;
1✔
221
                boolean numeric = false;                // default is "no"
1✔
222
                String reorder = null;
1✔
223
                String decomposition = null;
1✔
224

225
                final StringTokenizer queryTokenizer = new StringTokenizer(query, ";&");
1✔
226
                while (queryTokenizer.hasMoreElements()) {
1✔
227
                    final String param = queryTokenizer.nextToken();
1✔
228
                    final int eq = param.indexOf('=');
1✔
229
                    if (eq > 0) {
1!
230
                        final String kw = param.substring(0, eq);
1✔
231
                        if (kw != null) {
1!
232
                            final String val = param.substring(eq + 1);
1✔
233

234
                            switch (kw) {
1!
235
                                case "fallback":
236
                                    fallback = "yes".equals(val);
×
237
                                    break;
×
238

239
                                case "lang":
240
                                    lang = val;
1✔
241
                                    break;
1✔
242

243
                                case "version":
244
                                    version = val;
×
245
                                    break;
×
246

247
                                case "strength":
248
                                    strength = val;
1✔
249
                                    break;
1✔
250

251
                                case "maxVariable":
252
                                    maxVariable = val;
×
253
                                    break;
×
254

255
                                case "alternate":
256
                                    alternate = val;
×
257
                                    break;
×
258

259
                                case "backwards":
260
                                    backwards = "yes".equals(val);
×
261
                                    break;
×
262

263
                                case "normalization":
264
                                    normalization = "yes".equals(val);
×
265
                                    break;
×
266

267
                                case "caseLevel":
268
                                    caseLevel = "yes".equals(val);
×
269
                                    break;
×
270

271
                                case "caseFirst":
272
                                    caseFirst = val;
×
273
                                    break;
×
274

275
                                case "numeric":
276
                                    numeric = "yes".equals(val);
×
277
                                    break;
×
278

279
                                case "reorder":
280
                                    reorder = val;
×
281
                                    break;
×
282

283
                                case "decomposition":
284
                                    decomposition = val;
×
285
                                    break;
×
286

287
                                default:
288
                                    logger.warn("Unrecognized Collation parameter: {}", kw);
×
289
                                    break;
290
                            }
291
                        }
292
                    }
293
                }
294

295
                collator = getCollationFromParams(fallback, lang, version,
1✔
296
                        strength, maxVariable, alternate, backwards,
1✔
297
                        normalization, caseLevel, caseFirst, numeric,
1✔
298
                        reorder, decomposition, expression);
1✔
299
            }
300
        } else if(HTML_ASCII_CASE_INSENSITIVE_COLLATION_URI.equals(uri)) {
1✔
301
            try {
302
                collator = getHtmlAsciiCaseInsensitiveCollator();
1✔
303
            } catch (final Exception e) {
1✔
304
                throw new XPathException(expression, "Unable to instantiate HTML ASCII Case Insensitive Collator: " + e.getMessage(), e);
×
305
            }
306
        } else if(XQTS_ASCII_CASE_BLIND_COLLATION_URI.equals(uri)) {
1!
307
            try {
308
                collator = getXqtsAsciiCaseBlindCollator();
×
309
            } catch (final Exception e) {
×
310
                throw new XPathException(expression, "Unable to instantiate XQTS ASCII Case Blind Collator: " + e.getMessage(), e);
×
311
            }
312
        } else if (uri.startsWith("java:")) {
1!
313
            // java class specified: this should be a subclass of
314
            // com.ibm.icu.text.RuleBasedCollator
315
            final String uriClassName = uri.substring("java:".length());
×
316
            try {
317
                final Class<?> collatorClass = Class.forName(uriClassName);
×
318
                if (!Collator.class.isAssignableFrom(collatorClass)) {
×
319
                    final String msg = "The specified collator class '" + collatorClass.getName() + "' is not a subclass of com.ibm.icu.text.Collator";
×
320
                    logger.error(msg);
×
321
                    throw new XPathException(expression, ErrorCodes.FOCH0002, msg);
×
322
                }
323
                collator = (Collator) collatorClass.newInstance();
×
324
            } catch (final Exception e) {
×
325
                final String msg = "The specified collator class " + uriClassName + " could not be found";
×
326
                logger.error(msg);
×
327
                throw new XPathException(expression, ErrorCodes.FOCH0002, msg, e);
×
328
            }
329
        } else if (UNICODE_CODEPOINT_COLLATION_URI.equals(uri)) {
1✔
330
            collator = null;
1✔
331
        } else {
1✔
332
            final String msg = "Unknown collation : '" + uri + "'";
1✔
333
            logger.error(msg);
1✔
334
            throw new XPathException(expression, errorCode, msg);
1✔
335
        }
336

337
        if (collator != null) {
1✔
338
            // make immutable and therefore thread-safe!
339
            collator.freeze();
1✔
340
        }
341

342
        return collator;
1✔
343
    }
344

345
    /**
346
     * Determines if the two strings are equal with regards to a Collation.
347
     *
348
     * @param collator The collation, or null if no collation should be used.
349
     * @param s1 The first string to compare against the second.
350
     * @param s2 The second string to compare against the first.
351
     *
352
     * @return true if the Strings are equal.
353
     */
354
    public static boolean equals(@Nullable final Collator collator, final String s1, final String s2) {
355
        if (collator == null) {
×
356
            return s1.equals(s2);
×
357
        } else {
358
            return collator.equals(s1, s2);
×
359
        }
360
    }
361

362
    /**
363
     * Compares two strings with regards to a Collation.
364
     *
365
     * @param collator The collation, or null if no collation should be used.
366
     * @param s1 The first string to compare against the second.
367
     * @param s2 The second string to compare against the first.
368
     *
369
     * @return a negative integer, zero, or a positive integer if the
370
     *     {@code s1} is less than, equal to, or greater than {@code s2}.
371
     *
372
     * @throws UnsupportedOperationException if ICU4J does not support collation
373
     */
374
    public static int compare(@Nullable final Collator collator, final String s1,final  String s2) {
375
        if (collator == null) {
1✔
376
            return s1 == null ? (s2 == null ? 0 : -1) : s1.compareTo(s2);
1!
377
        } else {
378
            return collator.compare(s1, s2);
1✔
379
        }
380
    }
381

382
    /**
383
     * Determines if one string starts with another with regards to a Collation.
384
     *
385
     * @param collator The collation, or null if no collation should be used.
386
     * @param s1 The first string to compare against the second.
387
     * @param s2 The second string to compare against the first.
388
     *
389
     * @return true if {@code s1} starts with {@code @s2}.
390
     *
391
     * @throws UnsupportedOperationException if ICU4J does not support collation
392
     */
393
    public static boolean startsWith(@Nullable final Collator collator, final String s1, final String s2) {
394
        if (collator == null) {
1✔
395
            return s1.startsWith(s2);
1✔
396
        } else {
397
            if (s2.isEmpty()) {
1!
398
                return true;
×
399
            } else if (s1.isEmpty()) {
1✔
400
                return false;
1✔
401
            } else {
402
                final SearchIterator searchIterator =
1✔
403
                        new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
1✔
404
                return searchIterator.first() == 0;
1!
405
            }
406
        }
407
    }
408

409
    /**
410
     * Determines if one string ends with another with regards to a Collation.
411
     *
412
     * @param collator The collation, or null if no collation should be used.
413
     * @param s1 The first string to compare against the second.
414
     * @param s2 The second string to compare against the first.
415
     *
416
     * @return true if {@code s1} ends with {@code @s2}.
417
     *
418
     * @throws UnsupportedOperationException if ICU4J does not support collation
419
     */
420
    public static boolean endsWith(@Nullable final Collator collator, final String s1, final String s2) {
421
        if (collator == null) {
1✔
422
            return s1.endsWith(s2);
1✔
423
        } else {
424
            if (s2.isEmpty()) {
1!
425
                return true;
×
426
            } else if (s1.isEmpty()) {
1✔
427
                return false;
1✔
428
            } else {
429
                final SearchIterator searchIterator =
1✔
430
                        new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
1✔
431
                int lastPos = SearchIterator.DONE;
1✔
432
                int lastLen = 0;
1✔
433
                for (int pos = searchIterator.first(); pos != SearchIterator.DONE;
1✔
434
                     pos = searchIterator.next()) {
1✔
435
                    lastPos = pos;
1✔
436
                    lastLen = searchIterator.getMatchLength();
1✔
437
                }
438

439
                return lastPos > SearchIterator.DONE && lastPos + lastLen == s1.length();
1!
440
            }
441
        }
442
    }
443

444
    /**
445
     * Determines if one string contains another with regards to a Collation.
446
     *
447
     * @param collator The collation, or null if no collation should be used.
448
     * @param s1 The first string to compare against the second.
449
     * @param s2 The second string to compare against the first.
450
     *
451
     * @return true if {@code s1} contains {@code @s2}.
452
     *
453
     * @throws UnsupportedOperationException if ICU4J does not support collation
454
     */
455
    public static boolean contains(@Nullable final Collator collator, final String s1, final String s2) {
456
        if (collator == null) {
1✔
457
            return s1.contains(s2);
1✔
458
        } else {
459
            if (s2.isEmpty()) {
1!
460
                return true;
×
461
            } else if (s1.isEmpty()) {
1✔
462
                return false;
1✔
463
            } else {
464
                final SearchIterator searchIterator =
1✔
465
                        new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
1✔
466
                return searchIterator.first() >= 0;
1✔
467
            }
468
        }
469
    }
470

471
    /**
472
     * Finds the index of one string within another string with regards to a Collation.
473
     *
474
     * @param collator The collation, or null if no collation should be used.
475
     * @param s1 The string to look for {@code s2} in
476
     * @param s2 The substring to look for in {@code s1}.
477
     *
478
     * @return the index of the first occurrence of the specified substring,
479
     *          or {@code -1} if there is no such occurrence.
480
     */
481
    public static int indexOf(@Nullable final Collator collator, final String s1, final String s2) {
482
        if (collator == null) {
1✔
483
            return s1.indexOf(s2);
1✔
484
        } else {
485
            if (s2.isEmpty()) {
1!
486
                return 0;
×
487
            } else if (s1.isEmpty()) {
1!
488
                return -1;
1✔
489
            } else {
490
                final SearchIterator searchIterator =
×
491
                        new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
×
492
                return searchIterator.first();
×
493
            }
494
        }
495
    }
496

497
    /**
498
     * Get a Collator with the provided settings.
499
     *
500
     * @param fallback Determines whether the processor uses a fallback
501
     *     collation if a conformant collation is not available.
502
     * @param lang language code: a string in the lexical space of xs:language.
503
     * @param strength The collation strength as defined in UCA.
504
     * @param maxVariable Indicates that all characters in the specified group
505
     *     and earlier groups are treated as "noise" characters to be handled
506
     *     as defined by the alternate parameter. "space" | "punct" | "symbol".
507
     *     | "currency".
508
     * @param alternate Controls the handling of characters such as spaces and
509
     *     hyphens; specifically, the "noise" characters in the groups selected
510
     *     by the maxVariable parameter. "non-ignorable" | "shifted" |
511
     *     "blanked".
512
     * @param backwards indicates that the last accent in the string is the
513
     *     most significant.
514
     * @param normalization Indicates whether strings are converted to
515
     *     normalization form D.
516
     * @param caseLevel When used with primary strength, setting caseLevel has
517
     *     the effect of ignoring accents while taking account of case.
518
     * @param caseFirst Indicates whether upper-case precedes lower-case or
519
     *     vice versa.
520
     * @param numeric When numeric is specified, a sequence of consecutive
521
     *     digits is interpreted as a number, for example chap2 sorts before
522
     *     chap12.
523
     * @param reorder Determines the relative ordering of text in different
524
     *     scripts; for example the value digit,Grek,Latn indicates that
525
     *     digits precede Greek letters, which precede Latin letters.
526
     * @param decomposition The decomposition
527
     *
528
     * @return The collator of null if a Collator could not be retrieved
529
     *
530
     * @throws XPathException if an error occurs whilst getting the Collator
531
     */
532
    private static @Nullable Collator getCollationFromParams(
533
            final boolean fallback, @Nullable final String lang,
534
            @Nullable final String version, @Nullable final String strength,
535
            final String maxVariable, final String alternate,
536
            final boolean backwards, final boolean normalization,
537
            final boolean caseLevel, @Nullable final String caseFirst,
538
            final boolean numeric, @Nullable final String reorder,
539
            @Nullable final String decomposition) throws XPathException {
540
        return getCollationFromParams(fallback, lang, version, strength, maxVariable, alternate, backwards, normalization,
×
541
                                      caseLevel, caseFirst, numeric, reorder, decomposition, null);
×
542
    }
543

544
    /**
545
     * Get a Collator with the provided settings.
546
     *
547
     * @param fallback Determines whether the processor uses a fallback
548
     *     collation if a conformant collation is not available.
549
     * @param lang language code: a string in the lexical space of xs:language.
550
     * @param strength The collation strength as defined in UCA.
551
     * @param maxVariable Indicates that all characters in the specified group
552
     *     and earlier groups are treated as "noise" characters to be handled
553
     *     as defined by the alternate parameter. "space" | "punct" | "symbol".
554
     *     | "currency".
555
     * @param alternate Controls the handling of characters such as spaces and
556
     *     hyphens; specifically, the "noise" characters in the groups selected
557
     *     by the maxVariable parameter. "non-ignorable" | "shifted" |
558
     *     "blanked".
559
     * @param backwards indicates that the last accent in the string is the
560
     *     most significant.
561
     * @param normalization Indicates whether strings are converted to
562
     *     normalization form D.
563
     * @param caseLevel When used with primary strength, setting caseLevel has
564
     *     the effect of ignoring accents while taking account of case.
565
     * @param caseFirst Indicates whether upper-case precedes lower-case or
566
     *     vice versa.
567
     * @param numeric When numeric is specified, a sequence of consecutive
568
     *     digits is interpreted as a number, for example chap2 sorts before
569
     *     chap12.
570
     * @param reorder Determines the relative ordering of text in different
571
     *     scripts; for example the value digit,Grek,Latn indicates that
572
     *     digits precede Greek letters, which precede Latin letters.
573
     * @param decomposition The decomposition
574
     * @param expression the expression from which the collation derives
575
     *
576
     * @return The collator of null if a Collator could not be retrieved
577
     *
578
     * @throws XPathException if an error occurs whilst getting the Collator
579
     */
580
    private static @Nullable Collator getCollationFromParams(
581
            final boolean fallback, @Nullable final String lang,
582
            @Nullable final String version, @Nullable final String strength,
583
            final String maxVariable, final String alternate,
584
            final boolean backwards, final boolean normalization,
585
            final boolean caseLevel, @Nullable final String caseFirst,
586
            final boolean numeric, @Nullable final String reorder,
587
            @Nullable final String decomposition,
588
            @Nullable final Expression expression) throws XPathException {
589

590
        final Collator collator;
591
        if ("sme-SE".equals(lang)) {
1!
592
            try {
593
                collator = getSamiskCollator();
×
594
            } catch (final Exception pe) {
×
595
                logger.error(pe.getMessage(), pe);
×
596
                return null;
×
597
            }
598
        } else {
599
            final ULocale locale = getLocale(lang, expression);
1✔
600
            collator = Collator.getInstance(locale);
1✔
601
        }
602

603
        if(!fallback) {
1!
604
            //TODO(AR) how to disable fallback in ICU?
605
            logger.warn("Elemental does not yet support disabling collation fallback");
×
606
        }
607

608
        if(version != null) {
1!
609
            final VersionInfo versionInfo;
610
            try {
611
                versionInfo = VersionInfo.getInstance(version);
×
612
            } catch (final IllegalArgumentException iae) {
×
613
                logger.error(iae.getMessage(), iae);
×
614
                throw new XPathException(expression, iae.getMessage(), iae);
×
615
            }
616

617
            if(collator.getVersion().compareTo(versionInfo) < 0) {
×
618
                throw new XPathException(expression, "Requested UCA Collation version: " + version + ", however Elemental only has ICU UCA: " + collator.getVersion().toString());
×
619
            }
620
        }
621

622
        if (strength != null) {
1✔
623
            switch(strength) {
1!
624

625
                case "identical":
626
                    // the default setting
627
                    collator.setStrength(Collator.IDENTICAL);
×
628
                    break;
×
629

630
                case "1":
631
                case "primary":
632
                    collator.setStrength(Collator.PRIMARY);
1✔
633
                    break;
1✔
634

635
                case "2":
636
                case "secondary":
637
                    collator.setStrength(Collator.SECONDARY);
1✔
638
                    break;
1✔
639

640
                case "3":
641
                case "tertiary":
642
                    collator.setStrength(Collator.TERTIARY);
×
643
                    break;
×
644

645
                case "4":
646
                case "quaternary":
647
                    collator.setStrength(Collator.QUATERNARY);
×
648
                    break;
×
649

650
                default:
651
                    final String msg = "Elemental only supports Collation strengths of 'identical', 'primary', 'secondary', 'tertiary' or 'quaternary', requested: " + strength;
×
652
                    logger.error(msg);
×
653
                    throw new XPathException(expression, ErrorCodes.FOCH0002, msg);
×
654

655
            }
656
        }
657

658
        if(maxVariable != null) {
1!
659
            switch(maxVariable) {
1!
660
                case "space":
661
                    collator.setMaxVariable(Collator.ReorderCodes.SPACE);
×
662
                    break;
×
663

664
                case "punct":
665
                    collator.setMaxVariable(Collator.ReorderCodes.PUNCTUATION);
1✔
666
                    break;
1✔
667

668
                case "symbol":
669
                    collator.setMaxVariable(Collator.ReorderCodes.SYMBOL);
×
670
                    break;
×
671

672
                case "currency":
673
                    collator.setMaxVariable(Collator.ReorderCodes.CURRENCY);
×
674
                    break;
×
675

676
                default:
677
                    final String msg = "Elemental only supports Collation maxVariables of 'space', 'punct', 'symbol', or 'currency', requested: " + maxVariable;
×
678
                    logger.error(msg);
×
679
                    throw new XPathException(expression, ErrorCodes.FOCH0002, msg);
×
680
            }
681
        }
682

683
        if(alternate != null) {
1!
684
            switch(alternate) {
1!
685
                case "non-ignorable":
686
                    ((RuleBasedCollator)collator).setAlternateHandlingShifted(false);
1✔
687
                    break;
1✔
688

689
                case "shifted":
690
                case "blanked":
691
                    ((RuleBasedCollator)collator).setAlternateHandlingShifted(true);
×
692
                    break;
×
693

694
                default:
695
                    final String msg = "Collation alternate should be either 'non-ignorable', 'shifted' or 'blanked', but received: " + caseFirst;
×
696
                    logger.error(msg);
×
697
                    throw new XPathException(expression, ErrorCodes.FOCH0002, msg);
×
698
            }
699
        }
700

701
        if(backwards) {
1!
702
            ((RuleBasedCollator)collator).setFrenchCollation(true);
×
703
        }
704

705
        if(normalization) {
1!
706
            collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
×
707
        } else {
×
708
            collator.setDecomposition(Collator.NO_DECOMPOSITION);
1✔
709
        }
710

711
        if(caseLevel && collator.getStrength() == Collator.PRIMARY) {
1!
712
            ((RuleBasedCollator)collator).setCaseLevel(true);
×
713
        }
714

715
        if(caseFirst != null) {
1!
716
            switch(caseFirst) {
×
717
                case "upper":
718
                    ((RuleBasedCollator)collator).setUpperCaseFirst(true);
×
719
                    break;
×
720

721
                case "lower":
722
                    ((RuleBasedCollator)collator).setLowerCaseFirst(true);
×
723
                    break;
×
724

725
                default:
726
                    final String msg = "Collation case first should be either 'upper' or 'lower', but received: " + caseFirst;
×
727
                    logger.error(msg);
×
728
                    throw new XPathException(expression, ErrorCodes.FOCH0002, msg);
×
729
            }
730
        }
731

732
        if(numeric) {
1!
733
            ((RuleBasedCollator)collator).setNumericCollation(true);
×
734
        }
735

736
        if(reorder != null) {
1!
737
            final String[] reorderCodes = reorder.split(",");
×
738
            final List<Integer> icuCollatorReorderCodes =
×
739
                    Arrays.stream(reorderCodes)
×
740
                    .map(Collations::toICUCollatorReorderCode)
×
741
                    .filter(i -> i > -1)
×
742
                    .collect(Collectors.toList());
×
743

744
            if(!icuCollatorReorderCodes.isEmpty()) {
×
745
                final int[] codes = new int[icuCollatorReorderCodes.size()];
×
746
                for(int i = 0; i < codes.length; i++) {
×
747
                    codes[i] = icuCollatorReorderCodes.get(i);
×
748
                }
749
                collator.setReorderCodes(codes);
×
750
            }
751
        }
752

753
        if (decomposition != null) {
1!
754
            switch(decomposition) {
×
755
                case "none":
756
                    collator.setDecomposition(Collator.NO_DECOMPOSITION);
×
757
                    break;
×
758

759
                case "full":
760
                    collator.setDecomposition(Collator.FULL_DECOMPOSITION);
×
761
                    break;
×
762

763
                case "standard":
764
                case "":
765
                    // the default setting
766
                    collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
×
767
                    break;
×
768

769
                default:
770
                    final String msg = "Collation decomposition should be either 'none', 'full' or 'standard', but received: " + decomposition;
×
771
                    logger.error(msg);
×
772
                    throw new XPathException(expression, ErrorCodes.FOCH0002, msg);
×
773
            }
774
        }
775

776
        return collator;
1✔
777
    }
778

779
    private static int toICUCollatorReorderCode(final String reorderCode) {
780
        return switch (reorderCode.toLowerCase()) {
×
781
            case "default" -> Collator.ReorderCodes.DEFAULT;
×
782
            case "none" -> Collator.ReorderCodes.NONE;
×
783
            case "others" -> Collator.ReorderCodes.OTHERS;
×
784
            case "space" -> Collator.ReorderCodes.SPACE;
×
785
            case "first" -> Collator.ReorderCodes.FIRST;
×
786
            case "punctuation" -> Collator.ReorderCodes.PUNCTUATION;
×
787
            case "symbol" -> Collator.ReorderCodes.SYMBOL;
×
788
            case "currency" -> Collator.ReorderCodes.CURRENCY;
×
789
            case "digit" -> Collator.ReorderCodes.DIGIT;
×
790
            default -> {
791
                logger.warn("Elemental does not support the collation reorderCode: {}", reorderCode);
×
792
                yield -1;
×
793
            }
794
        };
795
    }
796

797
    /**
798
     * Get a locale for the provided language.
799
     *
800
     * @param lang The language
801
     *
802
     * @return The locale
803
     */
804
    private static ULocale getLocale(@Nullable final String lang, @Nullable final Expression expression) throws XPathException {
805
        if(lang == null) {
1✔
806
            return ULocale.getDefault();
1✔
807
        } else {
808
            final String[] components = lang.split("-");
1✔
809
            return switch (components.length) {
1!
810
                case 3 -> new ULocale(components[0], components[1], components[2]);
×
811
                case 2 -> new ULocale(components[0], components[1]);
1✔
812
                case 1 -> new ULocale(components[0]);
1✔
813
                default -> throw new XPathException(expression, ErrorCodes.FOCH0002, "Unrecognized lang=" + lang);
×
814
            };
815
        }
816
    }
817

818
    private static Collator getSamiskCollator() throws Exception {
819
        Collator collator = samiskCollator.get();
×
820
        if (collator == null) {
×
821
            // Collation rules contained in a String object.
822
            // Codes for the representation of names of languages:
823
            // http://www.loc.gov/standards/iso639-2/englangn.html
824
            // UTF-8 characters from:
825
            // http://chouette.info/entities/table-utf8.php
826
            samiskCollator.compareAndSet(null,
×
827
                    new RuleBasedCollator("< a,A< \u00E1,\u00C1< b,B< c,C"
×
828
                            + "< \u010d,\u010c< d,D< \u0111,\u0110< e,E"
829
                            + "< f,F< g,G< h,H< i,I< j,J< k,K< l,L< m,M"
830
                            + "< n,N< \u014b,\u014a< o,O< p,P< r,R< s,S"
831
                            + "< \u0161,\u0160< t,T< \u0167,\u0166< u,U"
832
                            + "< v,V< z,Z< \u017e,\u017d").freeze());
×
833
            collator = samiskCollator.get();
×
834
        }
835

836
        return collator;
×
837
    }
838

839
    private static Collator getHtmlAsciiCaseInsensitiveCollator() throws Exception {
840
        Collator collator = htmlAsciiCaseInsensitiveCollator.get();
1✔
841
        if (collator == null) {
1!
842
            collator = new RuleBasedCollator("&a=A &b=B &c=C &d=D &e=E &f=F &g=G &h=H "
1✔
843
                    + "&i=I &j=J &k=K &l=L &m=M &n=N &o=O &p=P &q=Q &r=R &s=S &t=T "
844
                    + "&u=U &v=V &w=W &x=X &y=Y &z=Z");
845
            collator.setStrength(Collator.PRIMARY);
1✔
846
            htmlAsciiCaseInsensitiveCollator.compareAndSet(null,
1✔
847
                    collator.freeze());
1✔
848
            collator = htmlAsciiCaseInsensitiveCollator.get();
1✔
849
        }
850

851
        return collator;
1✔
852
    }
853

854
    private static Collator getXqtsAsciiCaseBlindCollator() throws Exception {
855
        Collator collator = xqtsAsciiCaseBlindCollator.get();
×
856
        if (collator == null) {
×
857
            collator = new RuleBasedCollator("&a=A &b=B &c=C &d=D &e=E &f=F &g=G &h=H "
×
858
                    + "&i=I &j=J &k=K &l=L &m=M &n=N &o=O &p=P &q=Q &r=R &s=S &t=T "
859
                    + "&u=U &v=V &w=W &x=X &y=Y &z=Z");
860
            collator.setStrength(Collator.PRIMARY);
×
861
            xqtsAsciiCaseBlindCollator.compareAndSet(null,
×
862
                    collator.freeze());
×
863
            collator = xqtsAsciiCaseBlindCollator.get();
×
864
        }
865

866
        return collator;
×
867
    }
868
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc