• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ljacqu / wordeval / 14540577447

18 Apr 2025 07:23PM UTC coverage: 51.439% (-6.2%) from 57.611%
14540577447

push

github

ljacqu
Remove EvaluationResult type param from all Evaluator interfaces

239 of 546 branches covered (43.77%)

16 of 17 new or added lines in 16 files covered. (94.12%)

193 existing lines in 17 files now uncovered.

679 of 1320 relevant lines covered (51.44%)

3.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.37
/src/main/java/ch/jalu/wordeval/evaluators/impl/RepeatedSegment.java
1
package ch.jalu.wordeval.evaluators.impl;
2

3
import ch.jalu.wordeval.dictionary.Word;
4
import ch.jalu.wordeval.evaluators.WordEvaluator;
5
import ch.jalu.wordeval.evaluators.result.WordWithKeyAndScore;
6
import com.google.common.collect.ArrayListMultimap;
7
import com.google.common.collect.ListMultimap;
8
import lombok.Getter;
9

10
import java.util.ArrayList;
11
import java.util.Comparator;
12
import java.util.HashMap;
13
import java.util.HashSet;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.Objects;
17
import java.util.Set;
18
import java.util.stream.Collectors;
19

20
/**
21
 * Finds segments in words that are repeated multiple times,
22
 * e.g. 3x "est" in af. "geestestoestand".
23
 */
24
public class RepeatedSegment implements WordEvaluator {
2✔
25

26
  @Getter
6✔
27
  private final List<WordWithKeyAndScore> results = new ArrayList<>();
28

29
  @Override
30
  public void evaluate(Word wordObject) {
31
    String word = wordObject.getLowercase();
3✔
32
    Map<String, Integer> results = new NgramGenerator(word).getResults();
6✔
33
    removeNgramSubsets(results);
2✔
34
    results.forEach((ngram, count) -> this.results.add(new WordWithKeyAndScore(wordObject, ngram, count)));
17✔
35
  }
1✔
36

37
  /**
38
   * Removes "subset" results that are covered by larger results. For example, processing the word
39
   * "geestestoestand" will yield the pairs (3, est), (3, es), (3, st). The last two are "contained"
40
   * in the first and so are removed.
41
   *
42
   * @param results the result to trim
43
   */
44
  private static void removeNgramSubsets(Map<String, Integer> results) {
45
    Set<String> subsets = new HashSet<>();
4✔
46
    for (Map.Entry<String, Integer> entry : results.entrySet()) {
11✔
47
      Integer count = entry.getValue();
4✔
48
      createNgrams(entry.getKey()).stream()
8✔
49
        .filter(subset -> Objects.equals(count, results.get(subset)))
9✔
50
        .forEach(subsets::add);
4✔
51
    }
1✔
52
    subsets.forEach(results::remove);
7✔
53
  }
1✔
54

55
  /**
56
   * Creates all possible n-grams for the given word.
57
   *
58
   * @param word the word to create n-grams for
59
   * @return constructed of n-grams
60
   */
61
  private static List<String> createNgrams(String word) {
62
    List<String> ngrams = new ArrayList<>();
4✔
63
    for (int start = 0; start < word.length(); ++start) {
8✔
64
      // need to adjust end if start == 0 or else we will also include the entire word
65
      int end = start == 0 ? word.length() - 1 : word.length();
10✔
66
      for ( ; end > start; --end) {
5✔
67
        ngrams.add(word.substring(start, end));
7✔
68
      }
69
    }
70
    return ngrams;
2✔
71
  }
72

73
  @Override
74
  public ListMultimap<Object, Object> getTopResults(int topScores, int maxLimit) {
75
    // todo: Sort better, considering the key length.
UNCOV
76
    List<WordWithKeyAndScore> sortedResult = results.stream()
×
UNCOV
77
        .sorted(Comparator.comparing(WordWithKeyAndScore::getScore).reversed())
×
UNCOV
78
        .toList();
×
79

UNCOV
80
    Set<Integer> uniqueValues = new HashSet<>();
×
UNCOV
81
    ListMultimap<Object, Object> filteredResults = ArrayListMultimap.create();
×
UNCOV
82
    for (WordWithKeyAndScore word : sortedResult) {
×
UNCOV
83
      if (uniqueValues.add(word.getScore()) && uniqueValues.size() > topScores) {
×
UNCOV
84
        break;
×
85
      }
UNCOV
86
      filteredResults.put(word.getScore(), word.getWord().getRaw() + " (" + word.getKey() + ")");
×
UNCOV
87
      if (filteredResults.size() >= maxLimit) {
×
UNCOV
88
        break;
×
89
      }
UNCOV
90
    }
×
91

UNCOV
92
    return filteredResults;
×
93
  }
94

95
  /**
96
   * Counts all n-grams of a word.
97
   */
98
  private static final class NgramGenerator {
99

100
    private final String word;
101
    private final int maxNgramSize;
102
    private final Map<String, Integer> ngramCount;
103

104
    public NgramGenerator(String word) {
2✔
105
      this.word = word;
3✔
106
      this.maxNgramSize = word.length() / 2;
6✔
107
      this.ngramCount = new HashMap<>();
5✔
108
      countNgrams();
2✔
109
    }
1✔
110

111
    /**
112
     * Returns all n-grams with multiple occurrences.
113
     *
114
     * @return collection of n-grams occurring multiple times (ngram -> count)
115
     */
116
    public Map<String, Integer> getResults() {
117
      return ngramCount.entrySet().stream()
7✔
118
        .peek(this::adjustCount)
2✔
119
        .filter(entry -> entry.getValue() > 1)
13✔
120
        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
3✔
121
    }
122

123
    private void countNgrams() {
124
      for (int i = 0; i <= word.length() - 2; ++i) {
11✔
125
        createNGramsAtOffset(i);
3✔
126
      }
127
    }
1✔
128

129
    private void createNGramsAtOffset(int start) {
130
      int end = Math.min(word.length(), start + maxNgramSize);
9✔
131
      while (end - start >= 2) {
5✔
132
        String ngram = word.substring(start, end);
6✔
133
        int count = nullToZero(ngramCount.get(ngram));
7✔
134
        ngramCount.put(ngram, ++count);
8✔
135
        --end;
1✔
136
      }
1✔
137
    }
1✔
138

139
    /**
140
     * Adjusts the count of an n-gram to ensure that it really occurs as many times as counted.
141
     * For instance, in "Mississippi" the initial count of "issi" is 2 but they overlap, so it
142
     * needs to be corrected to 1.
143
     *
144
     * @param entry the entry to adjust
145
     */
146
    private void adjustCount(Map.Entry<String, Integer> entry) {
147
      if (entry.getValue() > 1) {
6✔
148
        // int division result -> gets ceil'd automatically
149
        int lengthDiff = (word.length() - word.replaceAll(entry.getKey(), "").length())
13✔
150
            / entry.getKey().length();
5✔
151
        if (lengthDiff != entry.getValue()) {
6✔
152
          // May still not be correct...
153
          entry.setValue(entry.getValue() - 1);
10✔
154
        }
155
      }
156
    }
1✔
157

158
    private static int nullToZero(Integer i) {
159
      return i == null ? 0 : i;
7✔
160
    }
161
  }
162
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc