• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ljacqu / wordeval / 14892089572

07 May 2025 07:54PM UTC coverage: 61.063% (+0.4%) from 60.614%
14892089572

push

github

ljacqu
Hunspell: Parse onlyInCompound flag and treat as forbidden word

399 of 710 branches covered (56.2%)

16 of 17 new or added lines in 5 files covered. (94.12%)

988 of 1618 relevant lines covered (61.06%)

3.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.59
/src/main/java/ch/jalu/wordeval/dictionary/hunspell/parser/AffixesParser.java
1
package ch.jalu.wordeval.dictionary.hunspell.parser;
2

3
import ch.jalu.wordeval.dictionary.hunspell.AffixFlagType;
4
import ch.jalu.wordeval.dictionary.hunspell.AffixType;
5
import lombok.extern.slf4j.Slf4j;
6
import org.apache.commons.lang3.StringUtils;
7
import org.springframework.stereotype.Component;
8

9
import java.util.regex.Matcher;
10
import java.util.regex.Pattern;
11
import java.util.stream.Stream;
12

13
/**
14
 * Hunspell .aff parser.
15
 */
16
@Slf4j
3✔
17
@Component
18
public class AffixesParser {
3✔
19

20
  // useful documentation: https://linux.die.net/man/4/hunspell
21

22
  // e.g. SFX V N 2
23
  private static final Pattern AFFIX_CLASS_HEADER_PATTERN =
1✔
24
      Pattern.compile("^(PFX|SFX)\\s+(\\S+)\\s+([YN])\\s+(\\d+)(\\s?#.*?)?$");
2✔
25
  // e.g. SFX V   e  ive  e
26
  private static final Pattern AFFIX_RULE_PATTERN =
2✔
27
      Pattern.compile("^(PFX|SFX)\\s+\\S+\\s+(\\S+)\\s+(\\S+)\\s+(\\S+)(.*?)?$");
2✔
28

29
  public ParsedAffixes parseAffFile(Stream<String> lines) {
30
    ParsedAffixes result = new ParsedAffixes();
4✔
31

32
    lines.forEach(line -> {
5✔
33
      line = line.trim();
3✔
34
      if (line.isEmpty() || line.startsWith("#")) {
7✔
35
        return;
1✔
36
      }
37

38
      Matcher headerMatcher = AFFIX_CLASS_HEADER_PATTERN.matcher(line);
4✔
39
      Matcher ruleMatcher = AFFIX_RULE_PATTERN.matcher(line);
4✔
40

41
      if (headerMatcher.matches()) {
3✔
42
        ParsedAffixClass affixClass = mapAffixClass(headerMatcher);
3✔
43
        result.addAffixClass(affixClass);
3✔
44
      } else if (ruleMatcher.matches()) {
4✔
45
        ParsedAffixClass.Rule rule = mapAffixRule(ruleMatcher, result.getFlagType());
5✔
46
        result.addRuleToCurrentClass(rule);
3✔
47
      } else if (line.startsWith("FLAG ")) {
5✔
48
        result.setFlagType(AffixFlagType.fromAffixFileString(line.substring("FLAG ".length())));
8✔
49
      } else if (line.startsWith("NEEDAFFIX ")) {
4✔
50
        result.setNeedAffixFlag(line.substring("NEEDAFFIX ".length()));
7✔
51
      } else if (line.startsWith("FORBIDDENWORD ")) {
4✔
52
        result.setForbiddenWordClass(line.substring("FORBIDDENWORD ".length()));
7✔
53
      } else if (line.startsWith("ONLYINCOMPOUND ")) {
4✔
54
        result.setOnlyInCompound(line.substring("ONLYINCOMPOUND ".length()));
7✔
55
      } else {
56
        handleUnknownLine(line);
3✔
57
      }
58
    });
1✔
59

60
    return result;
2✔
61
  }
62

63
  private static ParsedAffixClass mapAffixClass(Matcher headerMatcher) {
64
    ParsedAffixClass affixClass = new ParsedAffixClass();
4✔
65
    affixClass.type = AffixType.fromString(headerMatcher.group(1));
6✔
66
    affixClass.flag = headerMatcher.group(2);
5✔
67
    affixClass.crossProduct = headerMatcher.group(3).equalsIgnoreCase("Y");
7✔
68
    return affixClass;
2✔
69
  }
70

71
  private static ParsedAffixClass.Rule mapAffixRule(Matcher ruleMatcher, AffixFlagType flagType) {
72
    String strip = emptyIfZeroString(ruleMatcher.group(2));
5✔
73
    String affix = emptyIfZeroString(ruleMatcher.group(3));
5✔
74
    String condition = ruleMatcher.group(4);
4✔
75

76
    int slashIndex = affix.indexOf('/');
4✔
77
    if (slashIndex < 0) {
2✔
78
      return new ParsedAffixClass.Rule(strip, affix, condition);
7✔
79
    }
80

81
    String continuationClasses = affix.substring(slashIndex + 1);
6✔
82
    affix = emptyIfZeroString(affix.substring(0, slashIndex));
6✔
83
    return new ParsedAffixClass.Rule(strip, affix, flagType.split(continuationClasses), condition);
10✔
84
  }
85

86
  private void handleUnknownLine(String line) {
87
    // We don't support compounds for now, but let's log them as DEBUG so we can easily
88
    // find them again and be alerted to a lot of rules being skipped
89
    if (StringUtils.startsWithAny(line, "CHECKCOMPOUNDCASE", "CHECKCOMPOUNDPATTERN ", "CHECKCOMPOUNDDUP",
41!
90
        "COMPOUNDBEGIN ", "COMPOUNDMIDDLE ", "COMPOUNDEND ", "COMPOUNDPERMITFLAG ", "COMPOUNDMIN ", "COMPOUNDRULE ")) {
NEW
91
      log.debug("Skipping unsupported line: {}", line);
×
92
    } else if (StringUtils.startsWithAny(line, "BREAK ", "CHECKSHARPS", "HOME ", "ICONV ", "KEY ", "LANG ", "MAP ", "NAME ",
61!
93
        "NOSUGGEST ", "OCONV ", "REP ", "TRY ", "VERSION ", "WORDCHARS ")) {
94
      // Nothing to do: command is not relevant for this application
95
      return;
×
96
    } else if (line.startsWith("SET ")) {
4!
97
      if (!line.startsWith("SET UTF-8")) {
4!
98
        log.warn("Found unexpected encoding directive: {}", line);
×
99
      }
100
    } else {
101
      log.info("Unknown line: {}", line);
×
102
    }
103
  }
1✔
104

105
  private static String emptyIfZeroString(String s) {
106
    return "0".equals(s) ? "" : s;
8✔
107
  }
108
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc