• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

oracle / opengrok / #3650

24 Oct 2023 03:07PM UTC coverage: 66.012% (-8.4%) from 74.444%
#3650

push

vladak
refactory repository history check

9 of 9 new or added lines in 1 file covered. (100.0%)

38668 of 58577 relevant lines covered (66.01%)

0.66 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.74
/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/CtagsReader.java
1
/*
2
 * CDDL HEADER START
3
 *
4
 * The contents of this file are subject to the terms of the
5
 * Common Development and Distribution License (the "License").
6
 * You may not use this file except in compliance with the License.
7
 *
8
 * See LICENSE.txt included in this distribution for the specific
9
 * language governing permissions and limitations under the License.
10
 *
11
 * When distributing Covered Code, include this CDDL HEADER in each
12
 * file and include the License file at LICENSE.txt.
13
 * If applicable, add the following below this CDDL HEADER, with the
14
 * fields enclosed by brackets "[]" replaced with your own identifying
15
 * information: Portions Copyright [yyyy] [name of copyright owner]
16
 *
17
 * CDDL HEADER END
18
 */
19

20
/*
21
 * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
22
 * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
23
 */
24
package org.opengrok.indexer.analysis;
25

26
import java.util.EnumMap;
27
import java.util.function.Supplier;
28
import java.util.logging.Level;
29
import java.util.logging.Logger;
30
import java.util.regex.Matcher;
31
import java.util.regex.Pattern;
32
import org.opengrok.indexer.logger.LoggerFactory;
33
import org.opengrok.indexer.util.SourceSplitter;
34

35
/**
36
 * Represents a reader of output from runs of ctags.
37
 */
38
public class CtagsReader {
1✔
39

40
    /**
41
     * Matches the Unicode word that occurs last in a string, ignoring any
42
     * trailing whitespace or non-word characters, and makes it accessible as
43
     * the first capture, {@code match.group(1)}.
44
     */
45
    private static final Pattern LAST_UNICODE_WORD = Pattern.compile("(?U)(\\w+)[\\W\\s]*$");
1✔
46

47
    /**
48
     * Matches a Unicode word character.
49
     */
50
    private static final Pattern WORD_CHAR = Pattern.compile("(?U)\\w");
1✔
51

52
    private static final Logger LOGGER = LoggerFactory.getLogger(CtagsReader.class);
1✔
53

54
    /** A value indicating empty method body in tags, so skip it. */
55
    private static final int MIN_METHOD_LINE_LENGTH = 6;
56

57
    /**
58
     * 96 is used by universal ctags for some lines, but it's too low,
59
     * OpenGrok can theoretically handle 50000 with 8G heap. Also, this might
60
     * break scopes functionality, if set too low.
61
     */
62
    private static final int MAX_METHOD_LINE_LENGTH = 1030;
63

64
    private static final int MAX_CUT_LENGTH = 2000;
65

66
    /**
67
     * E.g. krb5 src/kdc/kdc_authdata.c has a signature for handle_authdata()
68
     * split across twelve lines, so use double that number.
69
     */
70
    private static final int MAX_CUT_LINES = 24;
71

72
    private final EnumMap<tagFields, String> fields = new EnumMap<>(
1✔
73
        tagFields.class);
74

75
    private final Definitions defs = new Definitions();
1✔
76

77
    private Supplier<SourceSplitter> splitterSupplier;
78
    private boolean triedSplitterSupplier;
79
    private SourceSplitter splitter;
80
    private long cutCacheKey;
81
    private String cutCacheValue;
82

83
    private int tabSize;
84

85
    /**
86
     * This should mimic
87
     * https://github.com/universal-ctags/ctags/blob/master/docs/format.rst or
88
     * http://ctags.sourceforge.net/FORMAT (for backwards compatibility).
89
     * Uncomment only those that are used ... (to avoid populating the hashmap
90
     * for every record).
91
     */
92
    public enum tagFields {
1✔
93
//        ARITY("arity"),
94
        CLASS("class"),
1✔
95
        //        INHERIT("inherit"), //this is not defined in above format docs, but both universal and exuberant ctags use it
96
        //        INTERFACE("interface"), //this is not defined in above format docs, but both universal and exuberant ctags use it
97
        //        ENUM("enum"),
98
        //        FILE("file"),
99
        //        FUNCTION("function"),
100
        //        KIND("kind"),
101
        LINE("line"),
1✔
102
        //        NAMESPACE("namespace"), //this is not defined in above format docs, but both universal and exuberant ctags use it
103
        //        PROGRAM("program"), //this is not defined in above format docs, but both universal and exuberant ctags use it
104
        SIGNATURE("signature");
1✔
105
//        STRUCT("struct"),
106
//        TYPEREF("typeref"),
107
//        UNION("union");
108

109
        //NOTE: if you edit above, always consult below charCmpEndOffset
110
        private final String name;
111

112
        /**
113
         * Sets {@code this.name} to {@code name}.
114
         * @param name the assignment value
115
         */
116
        tagFields(String name) {
1✔
117
            this.name = name;
1✔
118
        }
1✔
119

120
        /**
121
         * N.b. make this MAX. 8 chars! (backwards compat to DOS/Win).
122
         * 1 - means only 2 first chars are compared.
123
         * <p>This is very important, we only compare that amount of chars from
124
         * field types with input to save time. This number has to be long
125
         * enough to get rid of disambiguation.
126
         * <p>TODO:
127
         * <p>NOTE this is a big tradeoff in terms of input data, e.g. field
128
         * "find" will be considered "file" and overwrite the value, so if
129
         * ctags will send us buggy input, we will output buggy data TOO!
130
         * NO VALIDATION happens of input - but then we gain LOTS of speed, due to
131
         * not comparing the same field names again and again fully.
132
         */
133
        private static final int CHAR_CMP_END_OFFSET = 0;
134

135
        /**
136
         * Quickly get if the field name matches allowed/consumed ones.
137
         * @param fullName the name to look up
138
         * @return a defined value, or null if unmatched
139
         */
140
        public static CtagsReader.tagFields quickValueOf(String fullName) {
141
            for (tagFields x : tagFields.values()) {
1✔
142
                boolean match = true;
1✔
143
                for (int i = 0; i <= CHAR_CMP_END_OFFSET; i++) {
1✔
144
                    if (x.name.charAt(i) != fullName.charAt(i)) {
1✔
145
                        match = false;
1✔
146
                        break;
1✔
147
                    }
148
                }
149
                if (match) {
1✔
150
                    return x;
1✔
151
                }
152
            }
153
            return null;
1✔
154
        }
155
    }
156

157
    public int getTabSize() {
158
        return tabSize;
×
159
    }
160

161
    public void setTabSize(int tabSize) {
162
        this.tabSize = tabSize;
1✔
163
    }
1✔
164

165
    /**
166
     * Gets the instance's definitions.
167
     * @return a defined instance
168
     */
169
    public Definitions getDefinitions() {
170
        return defs;
1✔
171
    }
172

173
    /**
174
     * Sets the supplier of a {@link SourceSplitter} to use when ctags pattern
175
     * is insufficient, and the reader could use the source data.
176
     * <p>
177
     * N.b. because an I/O exception can occur, the supplier may return
178
     * {@code null}, which the {@link CtagsReader} handles.
179
     * @param obj defined instance or {@code null}
180
     */
181
    public void setSplitterSupplier(Supplier<SourceSplitter> obj) {
182
        splitter = null;
1✔
183
        triedSplitterSupplier = false;
1✔
184
        splitterSupplier = obj;
1✔
185
    }
1✔
186

187
    /**
188
     * Reads a line into the instance's definitions.
189
     * @param tagLine a defined line or null to no-op
190
     */
191
    public void readLine(String tagLine) {
192
        if (tagLine == null) {
1✔
193
            return;
×
194
        }
195

196
        int p = tagLine.indexOf('\t');
1✔
197
        if (p <= 0) {
1✔
198
            return;
×
199
        }
200
        String def = tagLine.substring(0, p);
1✔
201
        int mstart = tagLine.indexOf('\t', p + 1);
1✔
202

203
        String kind = null;
1✔
204

205
        int lp = tagLine.length();
1✔
206
        while ((p = tagLine.lastIndexOf('\t', lp - 1)) > 0) {
1✔
207
            String fld = tagLine.substring(p + 1, lp);
1✔
208
            lp = p;
1✔
209

210
            int sep = fld.indexOf(':');
1✔
211
            if (sep != -1) {
1✔
212
                tagFields pos = tagFields.quickValueOf(fld);
1✔
213
                if (pos != null) {
1✔
214
                    String val = fld.substring(sep + 1);
1✔
215
                    fields.put(pos, val);
1✔
216
                }
217
            } else {
1✔
218
                //TODO no separator, assume this is the kind
219
                kind = fld;
1✔
220
                break;
1✔
221
            }
222
        }
1✔
223

224
        String lnum = fields.get(tagFields.LINE);
1✔
225
        String signature = fields.get(tagFields.SIGNATURE);
1✔
226
        String classInher = fields.get(tagFields.CLASS);
1✔
227

228
        final String whole;
229
        final String match;
230
        int mlength = p - mstart;
1✔
231
        if ((p > 0) && (mlength > MIN_METHOD_LINE_LENGTH)) {
1✔
232
            whole = cutPattern(tagLine, mstart, p);
1✔
233
            if (mlength < MAX_METHOD_LINE_LENGTH) {
1✔
234
                match = whole.replaceAll("[ \t]+", " ");
1✔
235
                //TODO per format we should also recognize \r and \n
236
            } else {
237
                LOGGER.log(Level.FINEST, "Ctags: stripping method" +
×
238
                    " body for def {0} line {1}(scopes/highlight" +
239
                    " might break)", new Object[]{def, lnum});
240
                match = whole.substring(0, MAX_METHOD_LINE_LENGTH).replaceAll("[ \t]+", " ");
×
241
            }
242
        } else { // tag is in wrong format; cannot extract tagaddress from it; skip
243
            return;
1✔
244
        }
245

246
        // Bug #809: Keep track of which symbols have already been
247
        // seen to prevent duplicating them in memory.
248

249
        final String type = classInher == null ? kind : kind + " in " + classInher;
1✔
250

251
        int lineno;
252
        try {
253
            lineno = Integer.parseUnsignedInt(lnum);
1✔
254
        } catch (NumberFormatException e) {
×
255
            lineno = 0;
×
256
            LOGGER.log(Level.WARNING, "CTags line number parsing problem(but" +
×
257
                " will continue with line # 0) for symbol {0}", def);
258
        }
1✔
259

260
        CpatIndex cidx = bestIndexOfTag(lineno, whole, def);
1✔
261
        addTag(defs, cidx.lineno, def, type, match, classInher, signature,
1✔
262
            cidx.lineStart, cidx.lineEnd);
263

264
        String[] args;
265
        if (signature != null && !signature.equals("()") &&
1✔
266
                !signature.startsWith("() ") && (args =
1✔
267
                splitSignature(signature)) != null) {
1✔
268
            for (String arg : args) {
1✔
269
                //TODO this algorithm assumes that data types occur to
270
                //     the left of the argument name, so it will not
271
                //     work for languages like rust, kotlin, etc. which
272
                //     place the data type to the right of the argument name.
273
                //     Need an attribute from ctags to indicate data type
274
                //     location.
275
                // ------------------------------------------------------------
276
                // When no assignment of default values,
277
                // expecting: <type> <name>, or <name>
278
                //
279
                // When default value assignment applied to parameter,
280
                // expecting: <type> <name> = <value> or
281
                //            <name> = <value>
282
                // (Note whitespace content made irrelevant)
283

284
                // Need to ditch the default assignment value
285
                // so that the extraction loop below will work.
286
                // This assumes all languages use '=' to assign value.
287

288
                if (arg.contains("=")) {
1✔
289
                    String[] a = arg.split("=");
×
290
                    arg = a[0];  // throws away assigned value
×
291
                }
292
                arg = arg.trim();
1✔
293
                if (arg.length() < 1) {
1✔
294
                    continue;
×
295
                }
296

297
                cidx = bestIndexOfArg(lineno, whole, arg);
1✔
298

299
                String name = null;
1✔
300
                Matcher mname = LAST_UNICODE_WORD.matcher(arg);
1✔
301
                if (mname.find()) {
1✔
302
                    name = mname.group(1);
1✔
303
                } else if (arg.equals("...")) {
1✔
304
                    name = arg;
×
305
                }
306
                if (name != null) {
1✔
307
                    addTag(defs, cidx.lineno, name, "argument", def.trim() +
1✔
308
                        signature.trim(), null, signature, cidx.lineStart,
1✔
309
                        cidx.lineEnd);
310
                } else {
311
                    if (LOGGER.isLoggable(Level.FINEST)) {
1✔
312
                        LOGGER.log(Level.FINEST,
×
313
                            "Not matched arg:{0}|sig:{1}",
314
                            new Object[]{arg, signature});
315
                    }
316
                }
317
            }
318
        }
319

320
        fields.clear();
1✔
321
    }
1✔
322

323
    /**
324
     * Cuts the ctags TAG FILE FORMAT search pattern from the specified
325
     * {@code tagLine} between the specified tab positions, and un-escapes
326
     * {@code \\} and {@code \/}.
327
     * @return a defined string
328
     */
329
    private static String cutPattern(String tagLine, int startTab, int endTab) {
330
        // Three lead character represents "\t/^".
331
        String cut = tagLine.substring(startTab + 3, endTab);
1✔
332

333
        /*
334
         * Formerly this class cut four characters from the end, but my testing
335
         * revealed a bug for short lines in files with macOS endings (e.g.
336
         * cyrus-sasl mac/libdes/src/des_enc.c) where the pattern-ending $ is
337
         * not present. Now, inspect the end of the pattern to determine the
338
         * true cut -- which is appropriate for all content anyway.
339
         */
340
        if (cut.endsWith("$/;\"")) {
1✔
341
            cut = cut.substring(0, cut.length() - 4);
1✔
342
        } else if (cut.endsWith("/;\"")) {
1✔
343
            cut = cut.substring(0, cut.length() - 3);
1✔
344
        } else {
345
            /*
346
             * The former logic did the following without the inspections above.
347
             * Leaving this here as a fallback.
348
             */
349
            cut = cut.substring(0, cut.length() - 4);
×
350
        }
351
        return cut.replace("\\\\", "\\").replace("\\/", "/");
1✔
352
    }
353

354
    /**
355
     * Adds a tag to a {@code Definitions} instance.
356
     */
357
    private void addTag(Definitions defs, int lineno, String symbol,
358
            String type, String text, String namespace, String signature,
359
            int lineStart, int lineEnd) {
360
        // The strings are frequently repeated (a symbol can be used in
361
        // multiple definitions, multiple definitions can have the same type,
362
        // one line can contain multiple definitions). Intern them to minimize
363
        // the space consumed by them (see bug #809).
364
        defs.addTag(lineno, symbol.trim().intern(), type.trim().intern(),
1✔
365
            text.trim().intern(), namespace == null ? null :
1✔
366
            namespace.trim().intern(), signature, lineStart, lineEnd);
1✔
367
    }
1✔
368

369
    /**
370
     * Searches for the index of the best match of {@code str} in {@code whole}
371
     * in a multi-stage algorithm that first starts strictly to disfavor
372
     * abutting words and then relaxes -- and also works around ctags's possibly
373
     * having returned a partial line or only one line of a multi-line language
374
     * syntax.
375
     * @return a defined instance
376
     */
377
    private CpatIndex bestIndexOfTag(int lineno, String whole, String str) {
378
        if (whole.length() < 1) {
1✔
379
            return new CpatIndex(lineno, 0, 1, true);
×
380
        }
381
        String origWhole = whole;
1✔
382

383
        int t = tabSize;
1✔
384
        int s, e;
385

386
        int woff = strictIndexOf(whole, str);
1✔
387
        if (woff < 0) {
1✔
388
            /*
389
             * When a splitter is available, search the entire line.
390
             * (N.b. use 0-based indexing vs ctags's 1-based.)
391
             */
392
            String cut = trySplitterCut(lineno - 1, 1);
1✔
393
            if (cut == null || !cut.startsWith(whole)) {
1✔
394
                if (LOGGER.isLoggable(Level.FINE)) {
1✔
395
                    String readablecut = cut != null ? cut : "null\n";
×
396
                    LOGGER.log(Level.FINE,
×
397
                        "Bad cut:{0}|versus:{1}|line {2}",
398
                        new Object[]{readablecut, whole, lineno});
×
399
                }
×
400
            } else {
401
                whole = cut;
1✔
402
                woff = strictIndexOf(whole, str);
1✔
403
            }
404

405
            if (woff < 0) {
1✔
406
                /* At this point, do a lax search of the substring. */
407
                woff = whole.indexOf(str);
1✔
408
            }
409
        }
410

411
        if (woff >= 0) {
1✔
412
            s = ExpandTabsReader.translate(whole, woff, t);
1✔
413
            e = ExpandTabsReader.translate(whole, woff + str.length(), t);
1✔
414
            return new CpatIndex(lineno, s, e);
1✔
415
        }
416
        /*
417
         * When ctags has truncated a pattern, or when it spans multiple lines,
418
         * then `str' might not be found in `whole'. In that case, return an
419
         * imprecise index for the last character as the best we can do.
420
         */
421
        s = ExpandTabsReader.translate(origWhole, origWhole.length() - 1, t);
1✔
422
        e = ExpandTabsReader.translate(origWhole, origWhole.length(), t);
1✔
423
        return new CpatIndex(lineno, s, e, true);
1✔
424
    }
425

426
    /**
427
     * Searches for the index of the best match of {@code arg} in {@code whole}
428
     * in a multi-stage algorithm that first starts strictly to disfavor
429
     * abutting words and then relaxes -- and also works around ctags's possibly
430
     * having returned a partial line or only one line of a multi-line language
431
     * syntax or where ctags has transformed syntax.
432
     * <p>
433
     * E.g., the true source might read {@code const fru_regdef_t *d} with the
434
     * ctags signature reading {@code const fru_regdef_t * d}
435
     * @return a defined instance
436
     */
437
    private CpatIndex bestIndexOfArg(int lineno, String whole, String arg) {
438
        if (whole.length() < 1) {
1✔
439
            return new CpatIndex(lineno, 0, 1, true);
×
440
        }
441

442
        int t = tabSize;
1✔
443
        int s, e;
444

445
        // First search arg as-is in the current `whole' -- strict then lax.
446
        int woff = strictIndexOf(whole, arg);
1✔
447
        if (woff < 0) {
1✔
448
            woff = whole.indexOf(arg);
1✔
449
        }
450
        if (woff >= 0) {
1✔
451
            s = ExpandTabsReader.translate(whole, woff, t);
1✔
452
            e = ExpandTabsReader.translate(whole, woff + arg.length(), t);
1✔
453
            return new CpatIndex(lineno, s, e);
1✔
454
        }
455

456
        // Build a pattern from `arg' with looseness around whitespace.
457
        StringBuilder bld = new StringBuilder();
1✔
458
        int spos = 0;
1✔
459
        boolean lastWhitespace = false;
1✔
460
        boolean firstNonWhitespace = false;
1✔
461
        for (int i = 0; i < arg.length(); ++i) {
1✔
462
            char c = arg.charAt(i);
1✔
463
            if (Character.isWhitespace(c)) {
1✔
464
                if (!firstNonWhitespace) {
1✔
465
                    ++spos;
×
466
                } else if (!lastWhitespace) {
1✔
467
                    lastWhitespace = true;
1✔
468
                    if (spos < i) {
1✔
469
                        bld.append(Pattern.quote(arg.substring(spos, i)));
1✔
470
                    }
471
                    // m`\s*`
472
                    bld.append("\\s*");
1✔
473
                }
474
            } else {
475
                firstNonWhitespace = true;
1✔
476
                if (lastWhitespace) {
1✔
477
                    lastWhitespace = false;
1✔
478
                    spos = i;
1✔
479
                }
480
            }
481
        }
482
        if (spos < arg.length()) {
1✔
483
            bld.append(Pattern.quote(arg.substring(spos)));
1✔
484
        }
485
        if (bld.length() < 1) {
1✔
486
            if (LOGGER.isLoggable(Level.FINE)) {
×
487
                LOGGER.log(Level.FINE, "Odd arg:{0}|versus:{1}|line {2}",
×
488
                    new Object[]{arg, whole, lineno});
×
489
            }
490
            /*
491
             * When no fuzzy match can be generated, return an imprecise index
492
             * for the first character as the best we can do.
493
             */
494
            return new CpatIndex(lineno, 0, 1, true);
×
495
        }
496

497
        Pattern argpat = Pattern.compile(bld.toString());
1✔
498
        PatResult pr = bestMatch(whole, arg, argpat);
1✔
499
        if (pr.start >= 0) {
1✔
500
            s = ExpandTabsReader.translate(whole, pr.start, t);
1✔
501
            e = ExpandTabsReader.translate(whole, pr.end, t);
1✔
502
            return new CpatIndex(lineno, s, e);
1✔
503
        }
504

505
        /*
506
         * When a splitter is available, search the next several lines.
507
         * (N.b. use 0-based indexing vs ctags's 1-based.)
508
         */
509
        String cut = trySplitterCut(lineno - 1, MAX_CUT_LINES);
1✔
510
        if (cut == null || !cut.startsWith(whole)) {
1✔
511
            if (LOGGER.isLoggable(Level.FINE)) {
1✔
512
                String readablecut = cut != null ? cut : "null\n";
×
513
                LOGGER.log(Level.FINE, "Bad cut:{0}|versus:{1}|line {2}",
×
514
                    new Object[]{readablecut, whole, lineno});
×
515
            }
×
516
        } else {
517
            pr = bestMatch(cut, arg, argpat);
1✔
518
            if (pr.start >= 0) {
1✔
519
                return bestLineOfMatch(lineno, pr, cut);
1✔
520
            }
521
        }
522

523
        /*
524
         * When no match is found, return an imprecise index for the last
525
         * character as the best we can do.
526
         */
527
        s = ExpandTabsReader.translate(whole, whole.length() - 1, t);
1✔
528
        e = ExpandTabsReader.translate(whole, whole.length(), t);
1✔
529
        return new CpatIndex(lineno, s, e, true);
1✔
530
    }
531

532
    /**
533
     * Searches strictly then laxly.
534
     */
535
    private PatResult bestMatch(String whole, String arg, Pattern argpat) {
536
        PatResult m = strictMatch(whole, arg, argpat);
1✔
537
        if (m.start >= 0) {
1✔
538
            return m;
1✔
539
        }
540
        Matcher marg = argpat.matcher(whole);
1✔
541
        if (marg.find()) {
1✔
542
            return new PatResult(marg.start(), marg.end(), marg.group());
1✔
543
        }
544
        // Return m, which was invalid if we got to here.
545
        return m;
1✔
546
    }
547

548
    /**
549
     * Like {@link String#indexOf(java.lang.String)} but strict that a
550
     * {@code substr} starting with a word character cannot abut another word
551
     * character on its left and likewise on the right for a {@code substr}
552
     * ending with a word character.
553
     */
554
    private int strictIndexOf(String whole, String substr) {
555
        boolean strictLeft = substr.length() > 0 && WORD_CHAR.matcher(
1✔
556
            String.valueOf(substr.charAt(0))).matches();
1✔
557
        boolean strictRight = substr.length() > 0 && WORD_CHAR.matcher(
1✔
558
            String.valueOf(substr.charAt(substr.length() - 1))).matches();
1✔
559

560
        int spos = 0;
1✔
561
        do {
562
            int woff = whole.indexOf(substr, spos);
1✔
563
            if (woff < 0) {
1✔
564
                return -1;
1✔
565
            }
566

567
            spos = woff + 1;
1✔
568
            String onechar;
569
            /*
570
             * Reject if the previous character is a word character, as that
571
             * would not accord with a clean symbol break
572
             */
573
            if (strictLeft && woff > 0) {
1✔
574
                onechar = String.valueOf(whole.charAt(woff - 1));
1✔
575
                if (WORD_CHAR.matcher(onechar).matches()) {
1✔
576
                    continue;
1✔
577
                }
578
            }
579
            /*
580
             * Reject if the following character is a word character, as that
581
             * would not accord with a clean symbol break
582
             */
583
            if (strictRight && woff + substr.length() < whole.length()) {
1✔
584
                onechar = String.valueOf(whole.charAt(woff + substr.length()));
1✔
585
                if (WORD_CHAR.matcher(onechar).matches()) {
1✔
586
                    continue;
1✔
587
                }
588
            }
589
            return woff;
1✔
590
        } while (spos < whole.length());
1✔
591
        return -1;
×
592
    }
593

594
    /**
595
     * Like {@link #strictIndexOf(java.lang.String, java.lang.String)} but using
596
     * a pattern.
597
     */
598
    private PatResult strictMatch(String whole, String substr, Pattern pat) {
599
        boolean strictLeft = substr.length() > 0 && WORD_CHAR.matcher(
1✔
600
            String.valueOf(substr.charAt(0))).matches();
1✔
601
        boolean strictRight = substr.length() > 0 && WORD_CHAR.matcher(
1✔
602
            String.valueOf(substr.charAt(substr.length() - 1))).matches();
1✔
603

604
        Matcher m = pat.matcher(whole);
1✔
605
        while (m.find()) {
1✔
606
            String onechar;
607
            /*
608
             * Reject if the previous character is a word character, as that
609
             * would not accord with a clean symbol break
610
             */
611
            if (strictLeft && m.start() > 0) {
1✔
612
                onechar = String.valueOf(whole.charAt(m.start() - 1));
1✔
613
                if (WORD_CHAR.matcher(onechar).matches()) {
1✔
614
                    continue;
×
615
                }
616
            }
617
            /*
618
             * Reject if the following character is a word character, as that
619
             * would not accord with a clean symbol break
620
             */
621
            if (strictRight && m.end() < whole.length()) {
1✔
622
                onechar = String.valueOf(whole.charAt(m.end()));
1✔
623
                if (WORD_CHAR.matcher(onechar).matches()) {
1✔
624
                    continue;
1✔
625
                }
626
            }
627
            return new PatResult(m.start(), m.end(), m.group());
1✔
628
        }
629
        return new PatResult(-1, -1, null);
1✔
630
    }
631

632
    /**
633
     * Finds the line with the longest content from {@code cut}.
634
     * <p>
635
     * The {@link Definitions} tag model is based on a match within a line.
636
     * "signature" fields, however, can be condensed from multiple lines; and a
637
     * fuzzy match can therefore span multiple lines.
638
     */
639
    private CpatIndex bestLineOfMatch(int lineno, PatResult pr, String cut) {
640
        // (N.b. use 0-based indexing vs ctags's 1-based.)
641
        int lineOff = splitter.getOffset(lineno - 1);
1✔
642
        int mOff = lineOff + pr.start;
1✔
643
        int mIndex = splitter.findLineIndex(mOff);
1✔
644
        int zOff = lineOff + pr.end - 1;
1✔
645
        int zIndex = splitter.findLineIndex(zOff);
1✔
646

647
        int t = tabSize;
1✔
648
        int resIndex = mIndex;
1✔
649
        int contentLength = 0;
1✔
650
        /*
651
         * Initialize the following just to silence warnings but with values
652
         * that will be detected as "bad fuzzy" later.
653
         */
654
        String whole = "";
1✔
655
        int s = 0;
1✔
656
        int e = 1;
1✔
657
        /*
658
         * Iterate to determine the length of the portion of cut that is
659
         * contained within each line.
660
         */
661
        for (int lIndex = mIndex; lIndex <= zIndex; ++lIndex) {
1✔
662
            String iwhole = splitter.getLine(lIndex);
1✔
663
            int lOff = splitter.getOffset(lIndex);
1✔
664
            int lOffZ = lOff + iwhole.length();
1✔
665
            int offStart = Math.max(pr.start + lineOff, lOff);
1✔
666
            int offEnd = Math.min(pr.end + lineOff, lOffZ);
1✔
667
            if (offEnd - offStart > contentLength) {
1✔
668
                contentLength = offEnd - offStart;
1✔
669
                resIndex = lIndex;
1✔
670
                whole = iwhole;
1✔
671
                // (The following are not yet adjusted for tabs.)
672
                s = offStart - lOff;
1✔
673
                e = offEnd - lOff;
1✔
674
            }
675
        }
676

677
        if (s >= 0 && s < whole.length() && e >= 0 && e <= whole.length()) {
1✔
678
            s = ExpandTabsReader.translate(whole, s, t);
1✔
679
            e = ExpandTabsReader.translate(whole, e, t);
1✔
680
            // (N.b. use ctags's 1-based indexing.)
681
            return new CpatIndex(resIndex + 1, s, e);
1✔
682
        }
683

684
        /*
685
         * This should not happen -- but if it does, log it and return an
686
         * imprecise index for the first character as the best we can do.
687
         */
688
        if (LOGGER.isLoggable(Level.FINE)) {
×
689
            LOGGER.log(Level.FINE,
×
690
                "Bad fuzzy:{0}|versus:{1}|line {2} pos {3}-{4}|{5}|",
691
                new Object[]{pr.capture, cut, lineno, s, e, whole});
×
692
        }
693
        return new CpatIndex(lineno, 0, 1, true);
×
694
    }
695

696
    /**
697
     * TODO if some languages use different character for separating arguments,
698
     * below needs to be adjusted.
699
     * @return a defined instance or null
700
     */
701
    private static String[] splitSignature(String signature) {
702
        int off0 = 0;
1✔
703
        int offz = signature.length();
1✔
704
        int soff = off0;
1✔
705
        int eoff = offz;
1✔
706
        if (soff >= eoff) {
1✔
707
            return null;
×
708
        }
709

710
        // Trim outer punctuation if it exists.
711
        while (soff < signature.length() && (signature.charAt(soff) == '(' ||
1✔
712
                signature.charAt(soff) == '{')) {
1✔
713
            ++soff;
1✔
714
        }
715
        while (eoff - 1 > soff && (signature.charAt(eoff - 1) == ')' ||
1✔
716
                signature.charAt(eoff - 1) == '}')) {
1✔
717
            --eoff;
1✔
718
        }
719
        if (soff > off0 || eoff < offz) {
1✔
720
            signature = signature.substring(soff, eoff);
1✔
721
        }
722
        return signature.split(",");
1✔
723
    }
724

725
    /**
726
     * Tries to cut lines from a splitter provided by {@code splitterSupplier}.
727
     * @return a defined instance if a successful cut is made or else
728
     * {@code null}
729
     */
730
    private String trySplitterCut(int lineOffset, int maxLines) {
731
        if (splitter == null) {
1✔
732
            if (splitterSupplier == null || triedSplitterSupplier) {
1✔
733
                return null;
1✔
734
            }
735
            triedSplitterSupplier = true;
1✔
736
            splitter = splitterSupplier.get();
1✔
737
            if (splitter == null) {
1✔
738
                return null;
×
739
            }
740
        }
741

742
        long newCutCacheKey = ((long) lineOffset << 32) | maxLines;
1✔
743
        if (cutCacheKey == newCutCacheKey) {
1✔
744
            return cutCacheValue;
1✔
745
        }
746

747
        StringBuilder cutbld = new StringBuilder();
1✔
748
        for (int i = lineOffset; i < lineOffset + maxLines &&
1✔
749
                i < splitter.count() && cutbld.length() < MAX_CUT_LENGTH;
1✔
750
                ++i) {
1✔
751
            cutbld.append(splitter.getLine(i));
1✔
752
        }
753
        if (cutbld.length() > MAX_CUT_LENGTH) {
1✔
754
            cutbld.setLength(MAX_CUT_LENGTH);
×
755
        }
756
        cutCacheValue = cutbld.toString();
1✔
757
        cutCacheKey = newCutCacheKey;
1✔
758
        return cutCacheValue;
1✔
759
    }
760

761
    /**
762
     * Represents an index into ctags pattern entries.
763
     */
764
    private static class CpatIndex {
765
        public final int lineno;
766
        public final int lineStart;
767
        public final int lineEnd;
768
        public final boolean imprecise;
769

770
        CpatIndex(int lineno, int lineStart, int lineEnd) {
1✔
771
            this.lineno = lineno;
1✔
772
            this.lineStart = lineStart;
1✔
773
            this.lineEnd = lineEnd;
1✔
774
            this.imprecise = false;
1✔
775
        }
1✔
776

777
        CpatIndex(int lineno, int lineStart, int lineEnd, boolean imprecise) {
1✔
778
            this.lineno = lineno;
1✔
779
            this.lineStart = lineStart;
1✔
780
            this.lineEnd = lineEnd;
1✔
781
            this.imprecise = imprecise;
1✔
782
        }
1✔
783
    }
784

785
    /**
786
     * Represents a result from a pattern match -- valid if lineStart is greater
787
     * than or equal to zero.
788
     */
789
    private static class PatResult {
790
        public final int start;
791
        public final int end;
792
        public final String capture;
793

794
        PatResult(int start, int end, String capture) {
1✔
795
            this.start = start;
1✔
796
            this.end = end;
1✔
797
            this.capture = capture;
1✔
798
        }
1✔
799
    }
800
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc