• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

kermitt2 / grobid / 390

pending completion
390

push

circleci

more log debug; model update

2 of 2 new or added lines in 2 files covered. (100.0%)

14847 of 37498 relevant lines covered (39.59%)

0.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

68.06
/grobid-core/src/main/java/org/grobid/core/utilities/matching/ReferenceMarkerMatcher.java
1
package org.grobid.core.utilities.matching;
2

3
import com.google.common.base.Function;
4
import com.google.common.base.Predicate;
5
import com.google.common.collect.Iterables;
6
import com.google.common.collect.Lists;
7
import org.apache.lucene.analysis.standard.ClassicAnalyzer;
8
import org.apache.lucene.util.Version;
9
import org.grobid.core.data.BibDataSet;
10
import org.grobid.core.data.BiblioItem;;
11
import org.grobid.core.layout.LayoutToken;
12
import org.grobid.core.utilities.LayoutTokensUtil;
13
import org.grobid.core.utilities.Pair;
14
import org.grobid.core.utilities.TextUtilities;
15
import org.grobid.core.utilities.counters.CntManager;
16
import org.grobid.core.engines.counters.ReferenceMarkerMatcherCounters;
17
import org.slf4j.Logger;
18
import org.slf4j.LoggerFactory;
19

20
import java.util.ArrayList;
21
import java.util.Collections;
22
import java.util.List;
23
import java.util.Set;
24
import java.util.HashSet;
25
import java.util.regex.Matcher;
26
import java.util.regex.Pattern;
27

28
/**
29
 * Matching reference markers to extracted citations
30
 */
31
public class ReferenceMarkerMatcher {
32
    private static final Logger LOGGER = LoggerFactory.getLogger(ReferenceMarkerMatcher.class);
1✔
33

34
    public static final Pattern YEAR_PATTERN = Pattern.compile("[12][0-9]{3}[a-d]?");
1✔
35
    public static final Pattern YEAR_PATTERN_WITH_LOOK_AROUND = Pattern.compile("(?<!\\d)[12][0-9]{3}(?!\\d)[a-d]?");
1✔
36
    //public static final Pattern AUTHOR_NAME_PATTERN = Pattern.compile("[A-Z][A-Za-z]+");
37
    public static final Pattern AUTHOR_NAME_PATTERN = Pattern.compile("[A-Z][\\p{L}]+");
1✔
38
    //public static final Pattern NUMBERED_CITATION_PATTERN = Pattern.compile(" *[\\(\\[]? *(?:\\d+[-–]\\d+,|\\d+, *)*[ ]*(?:\\d+[-–]\\d+|\\d+)[\\)\\]]? *");
39
    public static final Pattern NUMBERED_CITATION_PATTERN = Pattern.compile("[\\(\\[]?\\s*(?:\\d+[-−–]\\d+,|\\d+,[ ]*)*[ ]*(?:\\d+[-–]\\d+|\\d+)\\s*[\\)\\]]?");
1✔
40
    public static final Pattern AUTHOR_SEPARATOR_PATTERN = Pattern.compile(";");
1✔
41
    public static final ClassicAnalyzer ANALYZER = new ClassicAnalyzer(Version.LUCENE_45);
1✔
42
    public static final int MAX_RANGE = 20;
43
    public static final Pattern NUMBERED_CITATIONS_SPLIT_PATTERN = Pattern.compile("[,;]");
1✔
44
    public static final Pattern AND_WORD_PATTERN = Pattern.compile("(and)|&");
1✔
45
    public static final Pattern DASH_PATTERN = Pattern.compile("[–−-]");
1✔
46

47
    public class MatchResult {  
48
        private String text;
49
        private List<LayoutToken> tokens;
50
        private BibDataSet bibDataSet;
51

52
        public MatchResult(String text, List<LayoutToken> tokens, BibDataSet bibDataSet) {
1✔
53
            this.text = text;
1✔
54
            this.tokens = tokens;
1✔
55
            this.bibDataSet = bibDataSet;
1✔
56
        }
1✔
57

58
        public String getText() {
59
            return text;
1✔
60
        }
61

62
        public List<LayoutToken> getTokens() {
63
            return tokens;
×
64
        }
65

66
        public BibDataSet getBibDataSet() {
67
            return bibDataSet;
1✔
68
        }
69
    }
70

71

72
    public static final Function<String, Object> IDENTITY = new Function<String, Object>() {
1✔
73
        @Override
74
        public Object apply(String s) {
75
            return s;
1✔
76
        }
77
    };
78
    private final LuceneIndexMatcher<BibDataSet, String> authorMatcher;
79
    private final LuceneIndexMatcher<BibDataSet, String> labelMatcher;
80
    private CntManager cntManager;
81
    private Set<String> allLabels = null;
1✔
82
    private Set<String> allFirstAuthors = null;
1✔
83

84
    public ReferenceMarkerMatcher(List<BibDataSet> bds, CntManager cntManager)
85
            throws EntityMatcherException {
1✔
86
        allLabels = new HashSet<String>();
1✔
87
        allFirstAuthors = new HashSet<String>();
1✔
88
        if ( (bds != null) && (bds.size() > 0) ) {
1✔
89
            for(BibDataSet bibDataSet : bds) {
1✔
90
                allLabels.add(bibDataSet.getRefSymbol());
1✔
91
                //System.out.println(bibDataSet.getRefSymbol());
92
                String authorString = bibDataSet.getResBib().getFirstAuthorSurname();
1✔
93
                if ((authorString != null) && (authorString.length() > 0))
1✔
94
                    allFirstAuthors.add(authorString);
1✔
95
            }
1✔
96
        }
97

98
        this.cntManager = cntManager;
1✔
99
        authorMatcher = new LuceneIndexMatcher<>(
1✔
100
                new Function<BibDataSet, Object>() {
1✔
101
                    @Override
102
                    public Object apply(BibDataSet bibDataSet) {
103
                        String authorString = bibDataSet.getResBib().getAuthors() + " et al";
1✔
104
                        if (bibDataSet.getResBib().getPublicationDate() != null) {
1✔
105
                            authorString += " " + bibDataSet.getResBib().getPublicationDate();
1✔
106
                        }
107
//                        System.out.println("Indexing: " + authorString);
108
                        return authorString;
1✔
109
                    }
110
                },
111
                IDENTITY
112
        );
113

114
        authorMatcher.setMustMatchPercentage(1.0);
1✔
115
        if (bds != null)
1✔
116
            authorMatcher.load(bds);
1✔
117
        labelMatcher = new LuceneIndexMatcher<>(
1✔
118
                new Function<BibDataSet, Object>() {
1✔
119
                    @Override
120
                    public Object apply(BibDataSet bibDataSet) {
121
                        return bibDataSet.getRefSymbol();
1✔
122
                    }
123
                },
124
                IDENTITY
125
        );
126

127
        labelMatcher.setMustMatchPercentage(1.0);
1✔
128
        if (bds != null)
1✔
129
            labelMatcher.load(bds);
1✔
130
    }
1✔
131

132
    public List<MatchResult> match(List<LayoutToken> refTokens) throws EntityMatcherException {
133
        cntManager.i(ReferenceMarkerMatcherCounters.INPUT_REF_STRINGS_CNT);
1✔
134
        String text = LayoutTokensUtil.toText(LayoutTokensUtil.dehyphenize(LayoutTokensUtil.enrichWithNewLineInfo(refTokens)));
1✔
135

136
        if (isAuthorCitationStyle(text)) {
1✔
137
            cntManager.i(ReferenceMarkerMatcherCounters.STYLE_AUTHORS);
1✔
138
//System.out.println("STYLE_AUTHORS: " + text);    
139
            return matchAuthorCitation(text, refTokens);
1✔
140
        } else if (isNumberedCitationReference(text)) {
1✔
141
            cntManager.i(ReferenceMarkerMatcherCounters.STYLE_NUMBERED);
1✔
142
//System.out.println("STYLE_NUMBERED: " + text);            
143
            return matchNumberedCitation(text, refTokens);
1✔
144
        } else {
145
            cntManager.i(ReferenceMarkerMatcherCounters.STYLE_OTHER);
1✔
146
//System.out.println("STYLE_OTHER: " + text);   
147
//            LOGGER.info("Other style: " + text);
148
            return Collections.singletonList(new MatchResult(text, refTokens, null));
1✔
149
        }
150
    }
151

152
    /*public boolean isAuthorCitationStyle(String text) {
153
        return ( YEAR_PATTERN.matcher(text.trim()).find() || 
154
                 NUMBERED_CITATION_PATTERN.matcher(text.trim()).find() )
155
            && AUTHOR_NAME_PATTERN.matcher(text.trim()).find();
156
    }*/
157

158
    public boolean isAuthorCitationStyle(String text) {
159
        return YEAR_PATTERN.matcher(text.trim()).find() && AUTHOR_NAME_PATTERN.matcher(text.trim()).find();
1✔
160
    }
161

162
    // relaxed number matching
163
    /*public static boolean isNumberedCitationReference(String t) {
164
        return NUMBERED_CITATION_PATTERN.matcher(t.trim()).find();
165
    }*/
166

167
    // number matching for number alone or in combination with author for cases "Naze et al. [5]"
168
    public boolean isNumberedCitationReference(String t) {
169
        return NUMBERED_CITATION_PATTERN.matcher(t.trim()).matches() || 
1✔
170
                 ( NUMBERED_CITATION_PATTERN.matcher(t.trim()).find() && AUTHOR_NAME_PATTERN.matcher(t.trim()).find() );
1✔
171
    }
172

173
    // string number matching
174
    /*public static boolean isNumberedCitationReference(String t) {
175
        return NUMBERED_CITATION_PATTERN.matcher(t.trim()).matches();
176
    }*/
177

178
    private List<MatchResult> matchNumberedCitation(String input, List<LayoutToken> refTokens) throws EntityMatcherException {
179
        List<Pair<String, List<LayoutToken>>> labels = getNumberedLabels(refTokens);
1✔
180
        List<MatchResult> results = new ArrayList<>();
1✔
181
        for (Pair<String, List<LayoutToken>> label : labels) {
1✔
182
            String text = label.a;
1✔
183
            List<LayoutToken> labelToks = label.b;
1✔
184
            List<BibDataSet> matches = labelMatcher.match(text);
1✔
185
            if (matches.size() == 1) {
1✔
186
                cntManager.i(ReferenceMarkerMatcherCounters.MATCHED_REF_MARKERS);
1✔
187
//                System.out.println("MATCHED: " + text + "\n" + matches.get(0).getRefSymbol() + "\n" + matches.get(0).getRawBib());
188

189
//                System.out.println("-----------");
190
                results.add(new MatchResult(text, labelToks, matches.get(0)));
1✔
191
            } else {
192
                cntManager.i(ReferenceMarkerMatcherCounters.UNMATCHED_REF_MARKERS);
1✔
193
                if (matches.size() != 0) {
1✔
194
                    cntManager.i(ReferenceMarkerMatcherCounters.MANY_CANDIDATES);
×
195
//                    LOGGER.info("MANY CANDIDATES: " + input + "\n" + text + "\n");
196
                    for (BibDataSet bds : matches) {
×
197
//                        LOGGER.info("  " + bds.getRawBib());
198
                    }
×
199

200
//                    LOGGER.info("----------");
201
                } else {
202
                    cntManager.i(ReferenceMarkerMatcherCounters.NO_CANDIDATES);
1✔
203
//                    LOGGER.info("NO CANDIDATES: " + text + "\n" + text);
204
//                    LOGGER.info("++++++++++++");
205
                }
206
                results.add(new MatchResult(text, labelToks, null));
1✔
207
            }
208
        }
1✔
209
        return results;
1✔
210
    }
211

212
    private static List<Pair<String, List<LayoutToken>>> getNumberedLabels(List<LayoutToken> layoutTokens) {
213
        List<List<LayoutToken>> split = LayoutTokensUtil.split(layoutTokens, NUMBERED_CITATIONS_SPLIT_PATTERN, true);
1✔
214
        List<Pair<String, List<LayoutToken>>> res = new ArrayList<>();
1✔
215
        // return [ ] or () depending on (1 - 2) or [3-5])
216
        Pair<Character, Character> wrappingSymbols = getWrappingSymbols(split.get(0));
1✔
217
        for (List<LayoutToken> s : split) {
1✔
218
            int minusPos = LayoutTokensUtil.tokenPos(s, DASH_PATTERN);
1✔
219
            if (minusPos < 0) {
1✔
220
                res.add(new Pair<>(LayoutTokensUtil.toText(s), s));
1✔
221
            } else {
222
                try {
223
                    LayoutToken minusTok = s.get(minusPos);
×
224
                    List<LayoutToken> leftNumberToks = s.subList(0, minusPos);
×
225
                    List<LayoutToken> rightNumberToks = s.subList(minusPos + 1, s.size());
×
226

227
                    Integer a;
228
                    Integer b;
229

230
                    a = Integer.valueOf(LuceneUtil.tokenizeString(ANALYZER, LayoutTokensUtil.toText(leftNumberToks)).get(0), 10);
×
231
                    b = Integer.valueOf(LuceneUtil.tokenizeString(ANALYZER, LayoutTokensUtil.toText(rightNumberToks)).get(0), 10);
×
232

233
                    if (a < b && b - a < MAX_RANGE) {
×
234
                        for (int i = a; i <= b; i++) {
×
235
                            List<LayoutToken> tokPtr;
236
                            if (i == a) {
×
237
                                tokPtr = leftNumberToks;
×
238
                            } else if (i == b) {
×
239
                                tokPtr = rightNumberToks;
×
240
                            } else {
241
                                tokPtr = Collections.singletonList(minusTok);
×
242
                            }
243

244
                            res.add(new Pair<>(wrappingSymbols.a + String.valueOf(i) + wrappingSymbols.b, tokPtr));
×
245
                        }
246
                    }
247
                } catch (Exception e) {
×
248
                    LOGGER.debug("Cannot parse citation reference range: " + s);
×
249
                }
×
250

251
            }
252
        }
1✔
253
        return res;
1✔
254
    }
255

256
    private static Pair<Character, Character> getWrappingSymbols(List<LayoutToken> layoutTokens) {
257
        for (LayoutToken t : layoutTokens) {
1✔
258
            if (LayoutTokensUtil.spaceyToken(t.t()) || LayoutTokensUtil.newLineToken(t.t())) {
1✔
259
                continue;
×
260
            }
261
            if (t.t().equals("(")) {
1✔
262
                return new Pair<>('(', ')');
×
263
            } else {
264
                return new Pair<>('[', ']');
1✔
265
            }
266
        }
267

268
        return new Pair<>('[', ']');
×
269
    }
270

271
    private List<MatchResult> matchAuthorCitation(String text, List<LayoutToken> refTokens) throws EntityMatcherException {
272
        List<Pair<String, List<LayoutToken>>> split = splitAuthors(refTokens);
1✔
273
        List<MatchResult> results = new ArrayList<>();
1✔
274

275
        for (Pair<String, List<LayoutToken>> si : split) {
1✔
276
            String c = si.a;
1✔
277
            List<LayoutToken> splitItem = si.b;
1✔
278

279
            List<BibDataSet> matches = authorMatcher.match(c);
1✔
280
            if (matches.size() == 1) {
1✔
281
                cntManager.i(ReferenceMarkerMatcherCounters.MATCHED_REF_MARKERS);
1✔
282
//System.out.println("MATCHED: " + text + "\n" + c + "\n" + matches.get(0).getRawBib());
283
                results.add(new MatchResult(c, splitItem, matches.get(0)));
1✔
284
            } else {
285
                if (matches.size() != 0) {
1✔
286
                    cntManager.i(ReferenceMarkerMatcherCounters.MANY_CANDIDATES);
1✔
287
                    List<BibDataSet> filtered = postFilterMatches(c, matches);
1✔
288
                    if (filtered.size() == 1) {
1✔
289
                        results.add(new MatchResult(c, splitItem, filtered.get(0)));
1✔
290
                        cntManager.i(ReferenceMarkerMatcherCounters.MATCHED_REF_MARKERS);
1✔
291
                        cntManager.i(ReferenceMarkerMatcherCounters.MATCHED_REF_MARKERS_AFTER_POST_FILTERING);
1✔
292
                    } else {
293
                        cntManager.i(ReferenceMarkerMatcherCounters.UNMATCHED_REF_MARKERS);
1✔
294
                        results.add(new MatchResult(c, splitItem, null));
1✔
295
                        if (filtered.size() == 0) {
1✔
296
                            cntManager.i(ReferenceMarkerMatcherCounters.NO_CANDIDATES_AFTER_POST_FILTERING);
1✔
297
                        } else {
298
                            cntManager.i(ReferenceMarkerMatcherCounters.MANY_CANDIDATES_AFTER_POST_FILTERING);
×
299
                            //LOGGER.info("SEVERAL MATCHED REF CANDIDATES: " + text + "\n-----\n" + c + "\n");
300
                            /*for (BibDataSet bds : matches) {
301
                                LOGGER.info("+++++");
302
                                LOGGER.info("  " + bds.getRawBib());
303
                            }*/
304
                        }
305
                    }
306
                } else {
1✔
307
                    results.add(new MatchResult(c, splitItem, null));
1✔
308
                    cntManager.i(ReferenceMarkerMatcherCounters.NO_CANDIDATES);
1✔
309
                    //LOGGER.info("NO MATCHED REF CANDIDATES: " + text + "\n" + c);
310
                    //LOGGER.info("++++++++++++");
311
                }
312
            }
313
        }
1✔
314

315
        return results;
1✔
316
    }
317

318
    // splitting into individual citation references strings like in:
319
    // Kuwajima et al., 1985; Creighton, 1990; Ptitsyn et al., 1990;
320
    private static List<Pair<String, List<LayoutToken>>> splitAuthors(List<LayoutToken> toks) {
321
        List<List<LayoutToken>> split = LayoutTokensUtil.split(toks, AUTHOR_SEPARATOR_PATTERN, true);
1✔
322
        List<Pair<String, List<LayoutToken>>> result = new ArrayList<>();
1✔
323

324
        for (List<LayoutToken> splitTokens : split) {
1✔
325
            //cases like: Khechinashvili et al. (1973) and Privalov (1979)
326
            String text = LayoutTokensUtil.toText(splitTokens);
1✔
327
            int matchCount = matchCount(text, YEAR_PATTERN_WITH_LOOK_AROUND);
1✔
328
            if (matchCount == 2 && text.contains(" and ")) {
1✔
329
                for (List<LayoutToken> ys : LayoutTokensUtil.split(splitTokens, AND_WORD_PATTERN, true)) {
1✔
330
                    result.add(new Pair<>(LayoutTokensUtil.toText(LayoutTokensUtil.dehyphenize(ys)), ys));
1✔
331
                }
1✔
332
            } else if (matchCount > 1) {
1✔
333
                List<List<LayoutToken>> yearSplit = LayoutTokensUtil.split(splitTokens, YEAR_PATTERN, true, false);
×
334
                List<List<LayoutToken>> yearSplitWithLeftOver = LayoutTokensUtil.split(splitTokens, YEAR_PATTERN, true, true);
×
335
                // do we have a leftover to be added?
336
                List<LayoutToken> leftover = null;
×
337
                if (yearSplit.size() < yearSplitWithLeftOver.size()) {
×
338
                    leftover = yearSplitWithLeftOver.get(yearSplitWithLeftOver.size()-1);
×
339
                }
340
                if (yearSplit.isEmpty()) {
×
341
                    result.add(new Pair<>(LayoutTokensUtil.toText(LayoutTokensUtil.dehyphenize(splitTokens)), splitTokens));
×
342
                } else {
343
                    if (matchCount(splitTokens, AUTHOR_NAME_PATTERN) == 1) {
×
344
                        // cases like Grafton et al. 1995, 1998;
345
                        // the idea is that we produce as many labels as we have year.
346
                        //E.g. "Grafton et al. 1995, 1998;" will become two pairs:
347
                        // 1) ("Grafton et al. 1995", tokens_of("Grafton et al. 1995"))
348
                        // 2) ("Grafton et al. 1998", tokens_of("1998"))
349
                        // this method will allow to mark two citations in a non-overlapping manner
350

351
                        List<LayoutToken> firstYearSplitItem;
352
                        firstYearSplitItem = yearSplit.get(0);
×
353
                        result.add(new Pair<>(LayoutTokensUtil.toText(LayoutTokensUtil.dehyphenize(firstYearSplitItem)), firstYearSplitItem));
×
354

355
                        List<LayoutToken> excludedYearToks = firstYearSplitItem.subList(0, firstYearSplitItem.size() - 1);
×
356
                        String authorName = LayoutTokensUtil.toText(LayoutTokensUtil.dehyphenize(excludedYearToks));
×
357

358
                        for (int i = 1; i < yearSplit.size(); i++) {
×
359
                            List<LayoutToken> toksI = yearSplit.get(i);
×
360
                            if (i == yearSplit.size()-1 && leftover != null) {
×
361
                                List<LayoutToken> lastSegmentTokens = toksI.subList(toksI.size() - 1, toksI.size());
×
362
                                lastSegmentTokens.addAll(leftover);
×
363
                                result.add(new Pair<>(authorName + " " + LayoutTokensUtil.toText(LayoutTokensUtil.dehyphenize(toksI)) + LayoutTokensUtil.toText(leftover), 
×
364
                                    lastSegmentTokens));
365
                            } else {
×
366
                                result.add(new Pair<>(authorName + " " + LayoutTokensUtil.toText(LayoutTokensUtil.dehyphenize(toksI)), 
×
367
                                    toksI.subList(toksI.size() - 1, toksI.size())));
×
368
                            }
369
                        }
370
                    } else {
×
371
                        // case when two authors still appear
372
                        for(int k=0; k<yearSplit.size(); k++) {
×
373
                            List<LayoutToken> item = yearSplit.get(k);
×
374
                            if (k == yearSplit.size()-1 && leftover != null) {
×
375
                                List<LayoutToken> lastSegmentTokens = item;
×
376
                                lastSegmentTokens.addAll(leftover);
×
377
                                result.add(new Pair<>(LayoutTokensUtil.toText(LayoutTokensUtil.dehyphenize(lastSegmentTokens)), lastSegmentTokens));
×
378
                            } else
×
379
                                result.add(new Pair<>(LayoutTokensUtil.toText(LayoutTokensUtil.dehyphenize(item)), item));
×
380
                        }
381
                    }
382
                }
383
            } else {
×
384
                result.add(new Pair<>(LayoutTokensUtil.toText(LayoutTokensUtil.dehyphenize(splitTokens)), splitTokens));
1✔
385
            }
386
        }
1✔
387
        return result;
1✔
388
    }
389

390
    private static int matchCount(String s, Pattern p) {
391
        Matcher m = p.matcher(s);
1✔
392
        int cnt = 0;
1✔
393
        while (m.find()) {
1✔
394
            cnt++;
1✔
395
        }
396
        return cnt;
1✔
397
    }
398

399
    private static int matchCount(List<LayoutToken> toks, Pattern p) {
400
        return matchCount(LayoutTokensUtil.toText(toks), p);
×
401
    }
402

403
    //if we match more than 1 citation based on name, then we leave only those citations that have author name first
404
    private List<BibDataSet> postFilterMatches(String c, List<BibDataSet> matches) {
405
        if (c.toLowerCase().contains("et al") || c.toLowerCase().contains(" and ")) {
1✔
406
            String[] sp = c.trim().split(" ");
1✔
407
            //callouts often include parentheses as seen in https://grobid.readthedocs.io/en/latest/training/fulltext/
408
            final String author = sp[0].replaceAll("[\\(\\[]", "").toLowerCase();
1✔
409
            ArrayList<BibDataSet> bibDataSets = Lists.newArrayList(Iterables.filter(matches, new Predicate<BibDataSet>() {
1✔
410
                @Override
411
                public boolean apply(BibDataSet bibDataSet) {
412
                    // first author last name formatted raw bib
413
                    return bibDataSet.getRawBib().trim().toLowerCase().startsWith(author);
1✔
414
                }
415
            }));
416

417
            if (bibDataSets.size() == 1) {
1✔
418
                return bibDataSets;
1✔
419
            }
420

421
            bibDataSets = Lists.newArrayList(Iterables.filter(matches, new Predicate<BibDataSet>() {
×
422
                @Override
423
                public boolean apply(BibDataSet bibDataSet) {
424
                    BiblioItem resBib = bibDataSet.getResBib();
×
425
                    if (resBib == null)
×
426
                        return false;
×
427
                    String firstAuthorLastName = resBib.getFirstAuthorSurname();
×
428
                    if (firstAuthorLastName == null)
×
429
                        return false;
×
430
                    firstAuthorLastName = firstAuthorLastName.toLowerCase();
×
431
                    // first author forename last name formatted raw bib
432
                    return firstAuthorLastName.equals(author);
×
433
                }
434
            }));
435

436
            if (bibDataSets.size() <= 1) {
×
437
                return bibDataSets;
×
438
            }
439

440
            //cases like c = "Smith et al, 2015" and Bds = <"Smith, Hoffmann, 2015", "Smith, 2015"> -- should prefer first one
441
            return Lists.newArrayList(Iterables.filter(bibDataSets, new Predicate<BibDataSet>() {
×
442
                @Override
443
                public boolean apply(BibDataSet bibDataSet) {
444
                    return (bibDataSet.getResBib().getFullAuthors() != null && bibDataSet.getResBib().getFullAuthors().size() > 1);
×
445
                }
446
            }));
447
        } else {
448
            //cases like c = "Smith, 2015" and Bds = <"Smith, Hoffmann, 2015", "Smith, 2015"> -- should prefer second one
449
            return Lists.newArrayList(Iterables.filter(matches, new Predicate<BibDataSet>() {
1✔
450
                @Override
451
                public boolean apply(BibDataSet bibDataSet) {
452
                    return bibDataSet.getResBib().getFullAuthors() != null && bibDataSet.getResBib().getFullAuthors().size() == 1;
1✔
453
                }
454
            }));
455
        }
456
    }
457

458
    /** 
459
     * Return true if the text is a known label from the bibliographical reference list
460
     */
461
    public boolean isKnownLabel(String text) {
462
        if ((allLabels != null) && (allLabels.contains(text.trim())))
1✔
463
            return true;
1✔
464
        else
465
            return false;
1✔
466
    }
467

468
    /**
469
     * Return true if the text is a known first author from the bibliographical reference list
470
     */
471
    public boolean isKnownFirstAuthor(String text) {
472
        if ( (allFirstAuthors != null) && (allFirstAuthors.contains(text.trim())) )
1✔
473
            return true;
1✔
474
        else 
475
            return false;
1✔
476
    }
477

478
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc