• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pkiraly / metadata-qa-marc / #1527

22 Aug 2025 02:21PM UTC coverage: 90.345%. Remained the same
#1527

push

pkiraly
Improve timeline handling

5191 of 6416 new or added lines in 219 files covered. (80.91%)

886 existing lines in 78 files now uncovered.

36717 of 40641 relevant lines covered (90.34%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.4
/src/main/java/de/gwdg/metadataqa/marc/definition/structure/ControlfieldPositionDefinition.java
1
package de.gwdg.metadataqa.marc.definition.structure;
2

3
import de.gwdg.metadataqa.marc.EncodedValue;
4
import de.gwdg.metadataqa.marc.definition.FRBRFunction;
5
import de.gwdg.metadataqa.marc.definition.general.codelist.CodeList;
6
import de.gwdg.metadataqa.marc.definition.general.parser.SubfieldContentParser;
7
import de.gwdg.metadataqa.marc.definition.general.validator.SubfieldValidator;
8
import org.apache.commons.lang3.StringUtils;
9

10
import java.io.Serializable;
11
import java.util.ArrayList;
12
import java.util.List;
13
import java.util.regex.Matcher;
14
import java.util.regex.Pattern;
15

16
/**
17
 *
18
 * @author Péter Király <peter.kiraly at gwdg.de>
19
 */
20
public class ControlfieldPositionDefinition implements Serializable {
21

22
  protected static final Pattern TRIMMABLE = Pattern.compile("^[^ ]+ +$");
1✔
23
  private static final long serialVersionUID = 1094865179514850215L;
24

25
  protected String id;
26
  protected String label;
27
  protected String bibframeTag;
28
  protected String mqTag;
29
  protected int positionStart;
30
  protected int positionEnd;
31
  protected boolean hasCodelist = true;
1✔
32
  protected List<EncodedValue> codes = new ArrayList<>();
1✔
33
  protected List<EncodedValue> historicalCodes;
34
  /**
35
   * Represents a list of valid codes extracted from the codes list. It serves as a cache for the list of valid codes
36
   * which aren't regex patterns, but simple string codes.
37
   */
38
  protected List<String> validCodes = new ArrayList<>();
1✔
39
  /**
40
   * Used in case the codes are separately defined in some code list. Used mostly if the list of codes would otherwise
41
   * be frequently repeated in the definition.
42
   */
43
  protected CodeList codeList;
44
  protected ControlfieldPositionDefinition codeListReference;
45

46
  protected int unitLength = -1;
1✔
47
  protected boolean repeatableContent = false;
1✔
48
  protected String defaultCode;
49
  protected String descriptionUrl;
50
  protected SubfieldContentParser parser;
51
  protected List<FRBRFunction> functions;
52
  private SubfieldValidator validator;
53

54
  public ControlfieldPositionDefinition() {}
1✔
55

56
  public ControlfieldPositionDefinition(String label, int positionStart, int positionEnd) {
1✔
57
    this.label = label;
1✔
58
    this.positionStart = positionStart;
1✔
59
    this.positionEnd = positionEnd;
1✔
60
  }
1✔
61

62
  public ControlfieldPositionDefinition(String label, int positionStart, int positionEnd,
63
                                        List<EncodedValue> codes) {
64
    this(label, positionStart, positionEnd);
1✔
65
    this.codes = codes;
1✔
66
    extractValidCodes();
1✔
67
  }
1✔
68

69
  public ControlfieldPositionDefinition setCodes(List<EncodedValue> codes) {
70
    this.codes = codes;
1✔
71
    extractValidCodes();
1✔
72
    return this;
1✔
73
  }
74

75
  public String getLabel() {
76
    return label;
1✔
77
  }
78

79
  public String getBibframeTag() {
UNCOV
80
    return bibframeTag;
×
81
  }
82

83
  public ControlfieldPositionDefinition setBibframeTag(String bibframeTag) {
UNCOV
84
    this.bibframeTag = bibframeTag;
×
UNCOV
85
    return this;
×
86
  }
87

88
  public String getMqTag() {
89
    return mqTag;
1✔
90
  }
91

92
  public ControlfieldPositionDefinition setMqTag(String mqTag) {
93
    this.mqTag = mqTag;
1✔
94
    return this;
1✔
95
  }
96

97
  public int getPositionStart() {
98
    return positionStart;
1✔
99
  }
100

101
  public int getPositionEnd() {
102
    return positionEnd;
1✔
103
  }
104

105
  public List<EncodedValue> getCodes() {
106
    return codes;
1✔
107
  }
108

109
  public int getUnitLength() {
110
    return unitLength;
1✔
111
  }
112

113
  public String getId() {
114
    return id;
1✔
115
  }
116

117
  public ControlfieldPositionDefinition setId(String id) {
118
    this.id = id;
1✔
119
    return this;
1✔
120
  }
121

122
  public ControlfieldPositionDefinition setUnitLength(int unitLength) {
123
    this.unitLength = unitLength;
1✔
124
    return this;
1✔
125
  }
126

127
  public boolean isRepeatableContent() {
128
    return repeatableContent;
1✔
129
  }
130

131
  public ControlfieldPositionDefinition setRepeatableContent(boolean repeatableContent) {
132
    this.repeatableContent = repeatableContent;
1✔
133
    return this;
1✔
134
  }
135

136
  public String getDefaultCode() {
UNCOV
137
    return defaultCode;
×
138
  }
139

140
  public ControlfieldPositionDefinition setCodeList(CodeList codeList) {
141
    this.codeList = codeList;
1✔
142
    return this;
1✔
143
  }
144

145
  public CodeList getCodeList() {
146
    return codeList;
×
147
  }
148

149

150
  public ControlfieldPositionDefinition setDefaultCode(String defaultCode) {
UNCOV
151
    this.defaultCode = defaultCode;
×
UNCOV
152
    return this;
×
153
  }
154

155
  public String getDescriptionUrl() {
156
    return descriptionUrl;
1✔
157
  }
158

159
  public boolean validate(String code) {
160
    // Blanks should probably be identified somehow differently
161
    // In the avram-unimarc schema, they are represented as a ' ' character, but in the records they can also be '#' or
162
    // '-', or even '^' OR EVEN '|' for some reason
163

164
    // The codes could also be validated case insensitively
165
    // Get the regex pattern if it exists. The current implementation assumes at most one regex pattern per code,
166
    // and it also assumes that the regex pattern is a valid regex pattern
167

168
    boolean regexValid = validateRegexPattern(code);
1✔
169

170
    if (!regexValid) {
1✔
NEW
171
      return false;
×
172
    }
173

174
    // Now check if the code is valid
175
    if (codes == null || codes.isEmpty()) {
1✔
176
      return true;
1✔
177
    }
178

179
    if (repeatableContent) {
1✔
180
      return validateRepeatableCode(code);
1✔
181
    }
182

183
    return validateNonRepeatableCode(code);
1✔
184
  }
185

186
  private boolean validateRegexPattern(String code) {
187
    EncodedValue regexPattern = codes.stream().filter(EncodedValue::isRegex).findFirst().orElse(null);
1✔
188

189
    if (regexPattern == null) {
1✔
190
      return true;
1✔
191
    }
192

193
    // Create a pattern from the regex pattern
194
    // The pattern should be case-insensitive
195

NEW
196
    Pattern pattern = Pattern.compile(regexPattern.getCode(), Pattern.CASE_INSENSITIVE);
×
197

198
    // Check if the code matches the pattern
NEW
199
    Matcher patternMatcher = pattern.matcher(code);
×
200

201
    // There is no checking for groups, so that should be implemented as well
202
    // TODO implement group checking
203

NEW
204
    return patternMatcher.matches();
×
205
  }
206

207
  /**
208
   * Validates repeatable codes. The method assumes that the code is repeatable and that all codes occur at equal
209
   * distances from each other.
210
   * In other words, if the unitLength is 3, then only the substring at positions divisible by 3 are checked.
211
   * @param code The code to validate
212
   * @return True if the code is valid, false otherwise
213
   */
214
  private boolean validateRepeatableCode(String code) {
215
    for (int i = 0; i < code.length(); i += unitLength) {
1✔
216
      String unit = code.substring(i, i + unitLength);
1✔
217
      if (!validCodes.contains(unit)) {
1✔
218
        return false;
1✔
219
      }
220
    }
221
    return true;
1✔
222
  }
223

224
  private boolean validateNonRepeatableCode(String code) {
225
    return validCodes.stream()
1✔
226
      .anyMatch(e -> e.equals(code));
1✔
227
  }
228

229
  public String resolve(String inputCode) {
230
    if (codes == null && codeList == null) {
1✔
NEW
231
      return inputCode;
×
232
    }
233

234
    if (repeatableContent) {
1✔
235
      inputCode = resolveRepeatable(inputCode);
1✔
236
    } else {
237
      inputCode = resolveSingleCode(inputCode);
1✔
238
    }
239

240
    return inputCode;
1✔
241
  }
242

243
  private String resolveRepeatable(String inputCode) {
244
    List<String> units = new ArrayList<>();
1✔
245
    for (int i=0; i < inputCode.length(); i += unitLength) {
1✔
246
      String unit = inputCode.substring(i, i+unitLength);
1✔
247
      if (!units.contains(unit))
1✔
248
        units.add(unit);
1✔
249
    }
250
    List<String> resolved = new ArrayList<>();
1✔
251
    for (String unit : units)
1✔
252
      resolved.add(resolveSingleCode(unit));
1✔
253

254
    inputCode = StringUtils.join(resolved, ", ");
1✔
255
    return inputCode;
1✔
256
  }
257

258
  private String resolveSingleCode(String inputCode) {
259
    if (codeList != null) {
1✔
260
      if (inputCode.length() > 1 && TRIMMABLE.matcher(inputCode).matches()) {
1✔
261
        String trimmed = inputCode.trim();
1✔
262
        if (codeList.isValid(trimmed))
1✔
263
          return codeList.getCode(trimmed).getLabel();
1✔
264
      }
265
      if (codeList.isValid(inputCode.trim()))
1✔
266
        return codeList.getCode(inputCode.trim()).getLabel();
1✔
267
    }
268

269
    if (codes != null) {
1✔
270
      for (EncodedValue code : codes)
1✔
271
        if (code.getCode().equals(inputCode))
1✔
272
          return code.getLabel();
1✔
273
    }
274

275
    return inputCode;
1✔
276
  }
277

278
  protected void extractValidCodes() {
279
    if (codes == null) {
1✔
UNCOV
280
      return;
×
281
    }
282
    for (EncodedValue code : codes) {
1✔
283
      if (!code.isRegex()) {
1✔
284
        validCodes.add(code.getCode());
1✔
285
      }
286
    }
1✔
287
  }
1✔
288

289
  public List<String> getValidCodes() {
UNCOV
290
    return validCodes;
×
291
  }
292

293
  public String formatPositon() {
294
    return (positionStart == positionEnd - 1)
1✔
295
      ? String.format("%02d", positionStart)
1✔
296
      : String.format("%02d-%02d", positionStart, positionEnd-1);
1✔
297
  }
298

299
  public String getControlField() {
300
    // The original implementation used to be quite closely tied to the original MARC21-in-code representation.
301
    // For now, I'm going to resort to a hacky approach where we use the ID of the control field
302
    // TODO rethink and reimplement this
303

304
    if (id.toLowerCase().startsWith("leader"))
1✔
305
      return "Leader";
1✔
306

307
    return id.substring(0, 3);
1✔
308
  }
309

310
  public EncodedValue getCode(String otherCode) {
311
    for (EncodedValue code : codes)
1✔
312
      if (code.getCode().equals(otherCode))
1✔
313
        return code;
1✔
314
      else if (code.isRange() && code.getRange().isValid(otherCode))
1✔
UNCOV
315
        return code;
×
316

UNCOV
317
    return null;
×
318
  }
319

320
  public boolean isHistoricalCode(String inputCode) {
321
    if (historicalCodes != null && !historicalCodes.isEmpty())
1✔
322
      for (EncodedValue historicalCode : historicalCodes)
1✔
323
        if (historicalCode.getCode().equals(inputCode))
1✔
324
          return true;
1✔
325
    return false;
1✔
326
  }
327

328
  public String getPath() {
329
    return getPath(true);
1✔
330
  }
331

332
  public String getPath(boolean showId) {
333
    if (showId)
1✔
334
      return String.format("%s/%s (%s)", getControlField(), formatPositon(), getId());
1✔
335
    else
336
      return String.format("%s/%s", getControlField(), formatPositon());
1✔
337
  }
338

339
  public boolean hasParser() {
340
    return parser != null;
1✔
341
  }
342

343
  public SubfieldContentParser getParser() {
344
    return parser;
1✔
345
  }
346

347
  public List<EncodedValue> getHistoricalCodes() {
UNCOV
348
    return historicalCodes;
×
349
  }
350

351
  public List<FRBRFunction> getFrbrFunctions() {
352
    return functions;
1✔
353
  }
354

355
  public SubfieldValidator getValidator() {
UNCOV
356
    return validator;
×
357
  }
358

359
  public ControlfieldPositionDefinition setValidator(SubfieldValidator validator) {
360
    this.validator = validator;
1✔
361
    return this;
1✔
362
  }
363

364
  public boolean hasCodelist() {
UNCOV
365
    return hasCodelist;
×
366
  }
367

368
  public ControlfieldPositionDefinition hasCodelist(boolean hasCodelist) {
369
    this.hasCodelist = hasCodelist;
1✔
370
    return this;
1✔
371
  }
372

373
  public ControlfieldPositionDefinition getCodeListReference() {
UNCOV
374
    return codeListReference;
×
375
  }
376

377
  public ControlfieldPositionDefinition setCodeListReference(ControlfieldPositionDefinition codeListReference) {
378
    this.codeListReference = codeListReference;
1✔
379
    return this;
1✔
380
  }
381

382
  @Override
383
  public String toString() {
384
    return "ControlfieldPositionDefinition{" +
1✔
385
        "label='" + label + '\'' +
386
        ", positionStart=" + positionStart +
387
        ", positionEnd=" + positionEnd +
388
        ", codes=" + codes +
389
        ", unitLength=" + unitLength +
390
        ", repeatableContent=" + repeatableContent +
391
        ", mqTag=" + mqTag +
392
        ", id=" + id +
393
        '}';
394
  }
395
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc