• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pkiraly / metadata-qa-marc / #1527

22 Aug 2025 02:21PM UTC coverage: 90.345%. Remained the same
#1527

push

pkiraly
Improve timeline handling

5191 of 6416 new or added lines in 219 files covered. (80.91%)

886 existing lines in 78 files now uncovered.

36717 of 40641 relevant lines covered (90.34%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.78
/src/main/java/de/gwdg/metadataqa/marc/analysis/validator/Validator.java
1
package de.gwdg.metadataqa.marc.analysis.validator;
2

3
import de.gwdg.metadataqa.marc.dao.DataField;
4
import de.gwdg.metadataqa.marc.dao.MarcControlField;
5
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
6
import de.gwdg.metadataqa.marc.dao.record.MarcRecord;
7
import de.gwdg.metadataqa.marc.definition.Cardinality;
8
import de.gwdg.metadataqa.marc.definition.ValidatorResponse;
9
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;
10
import de.gwdg.metadataqa.marc.definition.general.validator.ClassificationReferenceValidator;
11
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
12
import de.gwdg.metadataqa.marc.model.validation.ValidationError;
13
import de.gwdg.metadataqa.marc.model.validation.ValidationErrorType;
14

15
import java.util.ArrayList;
16
import java.util.HashMap;
17
import java.util.LinkedHashMap;
18
import java.util.List;
19
import java.util.Map;
20
import java.util.function.Function;
21
import java.util.logging.Logger;
22
import java.util.stream.Collectors;
23

24
import static de.gwdg.metadataqa.marc.Utils.count;
25

26
public class Validator extends AbstractValidator {
27
  private BibliographicRecord bibliographicRecord;
28
  private List<ValidationError> parsingErrors;
29

30
  private static final Logger logger = Logger.getLogger(Validator.class.getCanonicalName());
1✔
31

32
  public Validator() {
UNCOV
33
    super(new ValidatorConfiguration());
×
UNCOV
34
  }
×
35

36
  public Validator(ValidatorConfiguration configuration) {
37
    super(configuration);
1✔
38
  }
1✔
39

40
  public Validator(ValidatorConfiguration configuration, List<ValidationError> parsingErrors) {
41
    super(configuration);
1✔
42
    this.parsingErrors = parsingErrors;
1✔
43
  }
1✔
44

45
  public boolean validate(BibliographicRecord bibliographicRecord) {
46
    this.bibliographicRecord = bibliographicRecord;
1✔
47

48
    validationErrors = new ArrayList<>();
1✔
49
    if (parsingErrors != null && !parsingErrors.isEmpty()) {
1✔
50
      validationErrors.addAll(parsingErrors);
1✔
51
    }
52

53
    if (bibliographicRecord == null) {
1✔
54
      return validationErrors.isEmpty();
1✔
55
    }
56

57
    if (!bibliographicRecord.getSchemaType().equals(SchemaType.PICA)) {
1✔
58
      validateLeader();
1✔
59
    }
60
    validateUnhandledTags();
1✔
61
    validateControlfields();
1✔
62
    validateDatafields();
1✔
63

64
    // TODO: use reflection to get all validator class
65

66
    return validationErrors.isEmpty();
1✔
67
  }
68

69
  private boolean validateLeader() {
70

71
    // If it isn't a MARC record with a leader, it's valid
72
    if (!(bibliographicRecord instanceof MarcRecord)) {
1✔
NEW
73
      return true;
×
74
    }
75

76
    MarcRecord marcRecord = (MarcRecord) bibliographicRecord;
1✔
77

78
    LeaderValidator leaderValidator = new LeaderValidator(configuration);
1✔
79
    boolean isValidComponent = leaderValidator.validate(marcRecord.getLeader());
1✔
80
    if (isValidComponent) {
1✔
81
      return true;
1✔
82
    }
83
    List<ValidationError> leaderErrors = leaderValidator.getValidationErrors();
1✔
84

85
    for (ValidationError leaderError : leaderErrors) {
1✔
86
      if (leaderError.getRecordId() == null) {
1✔
NEW
87
        leaderError.setRecordId(marcRecord.getId());
×
88
      }
89
    }
1✔
90

91

92
    validationErrors.addAll(filterErrors(leaderErrors));
1✔
93
    return false;
1✔
94
  }
95

96
  private boolean validateUnhandledTags() {
97
    List<String> unhandledTags = bibliographicRecord.getUnhandledTags();
1✔
98
    if (unhandledTags.isEmpty()) {
1✔
99
      // No unhandled tags, so the record is valid
100
      return true;
1✔
101
    }
102

103
    List<ValidationError> unhandledTagErrors;
104
    if (configuration.doSummary()) {
1✔
105
      unhandledTagErrors = getUnhandledTagErrorsSummary(unhandledTags);
1✔
106
    } else {
107
      unhandledTagErrors = getUnhandledTagErrorsDetailed(unhandledTags);
1✔
108
    }
109

110
    // These errors weren't being filtered originally, so they aren't going to be filtered now either
111
    validationErrors.addAll(unhandledTagErrors);
1✔
112

113
    // As there were unhandled tags, the record is invalid
114
    return false;
1✔
115
  }
116

117
  private boolean validateControlfields() {
118

119
    if (!(bibliographicRecord instanceof MarcRecord)) {
1✔
120
      return true;
1✔
121
    }
122

123
    MarcRecord marcRecord = (MarcRecord) bibliographicRecord;
1✔
124

125
    boolean isValidComponent = true;
1✔
126

127
    ControlFieldValidator controlFieldValidator = new ControlFieldValidator(configuration);
1✔
128
    for (MarcControlField controlField : marcRecord.getControlfields()) {
1✔
129
      if (controlField == null) {
1✔
130
        continue;
1✔
131
      }
132

133
      // If the control field is not valid, add the errors to the list of validation errors,
134
      // and set isValidComponent to false. Setting and returning isValidComponent wouldn't work as it would
135
      // only return the result of the last control field validation.
136
      boolean isValidControlField = controlFieldValidator.validate(controlField);
1✔
137
      if (!isValidControlField) {
1✔
138
        validationErrors.addAll(filterErrors(controlFieldValidator.getValidationErrors()));
1✔
139
        isValidComponent = false;
1✔
140
      }
141
    }
1✔
142
    return isValidComponent;
1✔
143
  }
144

145
  private void validateDatafields() {
146
    DataFieldValidator validator = new DataFieldValidator(configuration);
1✔
147
    Map<RepetitionDao, Integer> repetitionCounter = new HashMap<>();
1✔
148

149
    for (DataField field : bibliographicRecord.getDatafields()) {
1✔
150
      validateDatafield(validator, repetitionCounter, field);
1✔
151
    }
1✔
152

153
    validateRepeatability(repetitionCounter);
1✔
154
  }
1✔
155

156
  /**
157
   * Returns a list of validation errors for each unhandled tag.
158
   * @param unhandledTags A list of unhandled tags
159
   * @return A list of error messages
160
   */
161
  private List<ValidationError> getUnhandledTagErrorsSummary(List<String> unhandledTags) {
162
    List<ValidationError> errors = new ArrayList<>();
1✔
163
    for (String tag : unhandledTags) {
1✔
164

165
      // If the tag is ignorable or the error type is ignorable, skip it
166
      boolean shouldBeValidated = !bibliographicRecord.isIgnorableField(tag, configuration.getIgnorableFields())
1✔
167
          && (!isIgnorableType(ValidationErrorType.FIELD_UNDEFINED));
1✔
168

169
      if (shouldBeValidated) {
1✔
170
        errors.add(new ValidationError(bibliographicRecord.getId(), tag, ValidationErrorType.FIELD_UNDEFINED, tag, null));
1✔
171
      }
172
    }
1✔
173
    return errors;
1✔
174
  }
175

176
  /**
177
   * Gets a list of validation errors for each set of identical unhandled tags as well as the count of such tags,
178
   * e.g. 198 (2*)
179
   * @param unhandledTags A list of unhandled tags
180
   * @return A list of error messages
181
   */
182
  private List<ValidationError> getUnhandledTagErrorsDetailed(List<String> unhandledTags) {
183
    List<ValidationError> errors = new ArrayList<>();
1✔
184

185
    // For all unhandled tags, count the occurrences
186
    Map<String, Long> tagCounts = unhandledTags.stream()
1✔
187
        .collect(Collectors.groupingBy(Function.identity(), LinkedHashMap::new, Collectors.counting()));
1✔
188

189
    for (Map.Entry<String, Long> entry : tagCounts.entrySet()) {
1✔
190
      String tag = entry.getKey();
1✔
191
      Long count = entry.getValue();
1✔
192
      boolean shouldBeValidated = !bibliographicRecord.isIgnorableField(tag, configuration.getIgnorableFields())
1✔
193
          && !isIgnorableType(ValidationErrorType.FIELD_UNDEFINED);
1✔
194

195
      if (shouldBeValidated) {
1✔
196
        String errorMessage = (count == 1) ? tag : String.format("%s (%d*)", tag, count);
1✔
197
        errors.add(new ValidationError(bibliographicRecord.getId(), tag, ValidationErrorType.FIELD_UNDEFINED, errorMessage, null));
1✔
198
      }
199
    }
1✔
200

201
    return errors;
1✔
202
  }
203

204
  /**
205
   * Validates a datafield by the given data field validator as well as classification reference validator.
206
   */
207
  private void validateDatafield(DataFieldValidator validator,
208
                                 Map<RepetitionDao, Integer> repetitionCounter,
209
                                 DataField field) {
210

211
    boolean shouldSkipValidation = field.getDefinition() == null
1✔
212
        || bibliographicRecord.isIgnorableField(field.getTag(), configuration.getIgnorableFields());
1✔
213

214
    if (shouldSkipValidation) {
1✔
215
      return;
1✔
216
    }
217

218
    RepetitionDao dao = new RepetitionDao(field.getTagWithOccurrence(), field.getDefinition());
1✔
219
    count(dao, repetitionCounter);
1✔
220

221
    validateField(validator, field);
1✔
222
    validateClassificationReference(field);
1✔
223
  }
1✔
224

225
  /**
226
   * The first portion of datafield validation. This method validates the datafield by the given data field validator.
227
   * @param validator The data field validator
228
   * @param field The data field to validate
229
   */
230
  private void validateField(DataFieldValidator validator, DataField field) {
231
    boolean isValidField = validator.validate(field);
1✔
232
    if (!isValidField) {
1✔
233
      validationErrors.addAll(filterErrors(validator.getValidationErrors()));
1✔
234
    }
235
  }
1✔
236

237
  private void validateClassificationReference(DataField field) {
238
    ValidatorResponse validatorResponse = ClassificationReferenceValidator.validate(field);
1✔
239
    if (!validatorResponse.isValid()) {
1✔
NEW
240
      validationErrors.addAll(filterErrors(validatorResponse.getValidationErrors()));
×
241
    }
242
  }
1✔
243

244
  /**
245
   * Given the repetition counter, this method validates the repeatability of the datafields. In case a field is
246
   * non-repeatable and it is repeated, an error is added to the list of validation errors.
247
   * @param repetitionCounter A map of repetition data access objects and their counts, i.e. how many times a given
248
   *                          datafield has occurred in the record
249
   */
250
  private void validateRepeatability(Map<RepetitionDao, Integer> repetitionCounter) {
251
    if (isIgnorableType(ValidationErrorType.FIELD_NONREPEATABLE)) {
1✔
NEW
252
      return;
×
253
    }
254

255
    // Get errors for all non-repeatable fields which were repeated
256
    List<ValidationError> nonRepeatableFieldErrors = repetitionCounter.entrySet().stream()
1✔
257
        .filter(entry -> isNonRepeatableRepeated(entry.getKey(), entry.getValue()))
1✔
258
        .map(entry -> createNonRepeatableFieldError(entry.getKey(), entry.getValue()))
1✔
259
        .collect(Collectors.toList());
1✔
260

261
    validationErrors.addAll(filterErrors(nonRepeatableFieldErrors));
1✔
262
  }
1✔
263

264
  private static boolean isNonRepeatableRepeated(RepetitionDao dao, Integer count) {
265
    DataFieldDefinition fieldDefinition = dao.getFieldDefinition();
1✔
266
    return count > 1 && fieldDefinition.getCardinality().equals(Cardinality.Nonrepeatable);
1✔
267
  }
268

269
  private ValidationError createNonRepeatableFieldError(RepetitionDao dao, Integer count) {
270
    DataFieldDefinition fieldDefinition = dao.getFieldDefinition();
1✔
271
    return new ValidationError(bibliographicRecord.getId(), fieldDefinition.getExtendedTag(),
1✔
272
        ValidationErrorType.FIELD_NONREPEATABLE,
273
        // String.format("there are %d instances", count),
274
        String.format("there are multiple instances", count),
1✔
275
        fieldDefinition.getDescriptionUrl()
1✔
276
    );
277
  }
278
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc