• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pkiraly / metadata-qa-marc / #1527

22 Aug 2025 02:21PM UTC coverage: 90.345%. Remained the same
#1527

push

pkiraly
Improve timeline handling

5191 of 6416 new or added lines in 219 files covered. (80.91%)

886 existing lines in 78 files now uncovered.

36717 of 40641 relevant lines covered (90.34%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/java/de/gwdg/metadataqa/marc/MarcFieldExtractor.java
1
package de.gwdg.metadataqa.marc;
2

3
import com.jayway.jsonpath.InvalidJsonException;
4
import de.gwdg.metadataqa.api.counter.FieldCounter;
5
import de.gwdg.metadataqa.api.interfaces.Calculator;
6
import de.gwdg.metadataqa.api.interfaces.MetricResult;
7
import de.gwdg.metadataqa.api.model.XmlFieldInstance;
8
import de.gwdg.metadataqa.api.model.selector.JsonSelector;
9
import de.gwdg.metadataqa.api.model.selector.Selector;
10
import de.gwdg.metadataqa.api.schema.Schema;
11
import de.gwdg.metadataqa.api.util.CompressionLevel;
12
import de.gwdg.metadataqa.marc.dao.Control007;
13
import de.gwdg.metadataqa.marc.dao.Control008;
14
import de.gwdg.metadataqa.marc.dao.Marc21Leader;
15
import de.gwdg.metadataqa.marc.definition.general.codelist.CodeList;
16
import de.gwdg.metadataqa.marc.definition.general.codelist.LanguageCodes;
17
import de.gwdg.metadataqa.marc.definition.general.codelist.OrganizationCodes;
18
import org.apache.commons.lang3.StringUtils;
19

20
import java.io.Serializable;
21
import java.util.ArrayList;
22
import java.util.Arrays;
23
import java.util.Collections;
24
import java.util.HashMap;
25
import java.util.LinkedHashMap;
26
import java.util.List;
27
import java.util.Map;
28
import java.util.StringTokenizer;
29
import java.util.logging.Logger;
30

31
/**
32
 *
33
 * @author Péter Király <peter.kiraly at gwdg.de>
34
 */
35
public class MarcFieldExtractor implements Calculator, Serializable {
36

UNCOV
37
  private static final Logger logger = Logger.getLogger(MarcFieldExtractor.class.getCanonicalName());
×
38

39
  public static final String CALCULATOR_NAME = "fieldExtractor";
40
  public static final String LEADER_KEY = "leader";
UNCOV
41
  private static final List<String> authorFields = Arrays.asList("100$a", "110$a", "700$a", "710$a");
×
42

43
  public static final String FIELD_NAME = "recordId";
44
  private String idPath;
45
  protected FieldCounter<List<String>> resultMap;
46
  protected Schema schema;
47
  private String recordId;
48
  private Marc21Leader leader;
49
  private Control007 x007;
50
  private Control008 x008;
51
  private Map<String, Object> duplumKeyMap;
52
  private List<String> titleWords;
53
  private List<String> authorWords;
54
  private String duplumKeyType;
55
  private List<String> dateOfPublication;
56
  private List<String> isbn;
57
  private String publisherOrDistributorNumber;
58
  private String abbreviatedNameOfPublisher;
59
  private String numberOfPart;
60
  private String nameOfPart;
61
  private String extent;
62
  private String musicalPresentationStatement;
63
  private String volumeDesignation;
64
  private String relatedParts;
65
  private List<X035aSystemControlNumber> systemControlNumbers;
66
  private Map<String, Object> oclcMap;
67
  private boolean valid;
68

UNCOV
69
  public MarcFieldExtractor() {
×
70
  }
×
71

UNCOV
72
  public MarcFieldExtractor(Schema schema) {
×
73
    this.schema = schema;
×
74
    setIdPath(schema.getExtractableFields().get("001"));
×
75
  }
×
76

UNCOV
77
  public MarcFieldExtractor(String idPath) {
×
78
    this.idPath = idPath;
×
79
  }
×
80

81
  @Override
82
  public String getCalculatorName() {
UNCOV
83
    return CALCULATOR_NAME;
×
84
  }
85

86
  public void measure(JsonSelector selector)
87
        throws InvalidJsonException {
UNCOV
88
    valid = true;
×
89
    resultMap = new FieldCounter<>();
×
90
    duplumKeyMap = null;
×
91
    recordId = null;
×
92
    leader = null;
×
93
    x007 = null;
×
94
    x008 = null;
×
95
    titleWords = null;
×
96
    authorWords = null;
×
97
    duplumKeyType = null;
×
98
    dateOfPublication = null;
×
99
    isbn = null;
×
100
    publisherOrDistributorNumber = null;
×
101
    abbreviatedNameOfPublisher = null;
×
102
    numberOfPart = null;
×
103
    nameOfPart = null;
×
104
    extent = null;
×
105
    musicalPresentationStatement = null;
×
106
    volumeDesignation = null;
×
107
    relatedParts = null;
×
108
    systemControlNumbers = null;
×
109
    oclcMap = null;
×
110

UNCOV
111
    recordId = ((List<XmlFieldInstance>) selector.get(getIdPath())).get(0).getValue();
×
112
    selector.setRecordId(recordId);
×
113
    resultMap.put(FIELD_NAME, Arrays.asList(recordId));
×
114
    if (schema != null) {
×
115
      String path;
UNCOV
116
      for (String fieldName : schema.getExtractableFields().keySet()) {
×
117
        if (!fieldName.equals(FIELD_NAME)) {
×
118
          path = schema.getExtractableFields().get(fieldName);
×
119
          List<XmlFieldInstance> instances = selector.get(path);
×
120
          List<String> values = null;
×
121
          if (!isNull(instances)) {
×
122
            values = new ArrayList<>();
×
123
            for (XmlFieldInstance instance : instances) {
×
124
              values.add(instance.getValue());
×
125
            }
×
126
            if (fieldName.equals(LEADER_KEY)) {
×
NEW
127
              leader = new Marc21Leader(values.get(0));
×
128
            }
129
          }
UNCOV
130
          resultMap.put(fieldName, values);
×
131
        }
UNCOV
132
      }
×
133
    }
UNCOV
134
    processLeader();
×
135
    process007();
×
136
    process008();
×
137
    processType();
×
138
    processTitleWords();
×
139
    processAuthorWords();
×
140
    processDateOfPublication();
×
141
    processIsbn();
×
142
    processPublisherOrDistributorNumber();
×
143
    processAbbreviatedNameOfPublisher();
×
144
    processNumberOfPart();
×
145
    processNameOfPart();
×
146
    processExtent();
×
147
    processMusicalPresentationStatement();
×
148
    processVolumeDesignation();
×
149
    processRelatedParts();
×
150
    processSystemControlNumbers();
×
151
    processOclcFields();
×
152
    createDuplumKeyMap();
×
153
  }
×
154

155
  private static boolean isNull(List<XmlFieldInstance> values) {
UNCOV
156
    return values == null
×
157
          || values.isEmpty()
×
158
          || values.get(0) == null
×
159
          || values.get(0).getValue() == null;
×
160
  }
161

162
  public String getIdPath() {
UNCOV
163
    return idPath;
×
164
  }
165

166
  public void setIdPath(String idPath) {
UNCOV
167
    this.idPath = idPath;
×
168
  }
×
169

170
  @Override
171
  public List<MetricResult> measure(Selector selector) {
NEW
172
    return Collections.emptyList();
×
173
  }
174

175
  // @Override
176
  public Map<String, ? extends Object> getResultMap() {
UNCOV
177
    return resultMap.getMap();
×
178
  }
179

180
  // @Override
181
  public Map<String, Map<String, ? extends Object>> getLabelledResultMap() {
UNCOV
182
    Map<String, Map<String, ? extends Object>> labelledResultMap = new LinkedHashMap<>();
×
183
    labelledResultMap.put(getCalculatorName(), resultMap.getMap());
×
184
    return labelledResultMap;
×
185
  }
186

187
  // @Override
188
  public String getCsv(boolean withLabel, CompressionLevel compressionLevel) {
UNCOV
189
    return resultMap.getCsv(withLabel, CompressionLevel.ZERO); // the extracted fields should never be compressed!
×
190
  }
191

192
  @Override
193
  public List<String> getHeader() {
UNCOV
194
    List<String> headers = new ArrayList<>();
×
195
    headers.add(FIELD_NAME);
×
196
    return headers;
×
197
  }
198

199
  public void processLeader() {
UNCOV
200
    if (resultMap.has(LEADER_KEY))
×
NEW
201
      leader = new Marc21Leader(resultMap.get(LEADER_KEY).get(0));
×
202
    else
UNCOV
203
      logger.severe("No leader in result map. Nr of existing vars: " + StringUtils.join(resultMap.getMap().keySet(), ", "));
×
204
  }
×
205

206
  public void process007() {
UNCOV
207
    if (resultMap.get("007") == null) {
×
208
      valid = false;
×
209
    } else {
UNCOV
210
      x007 = new Control007(resultMap.get("007").get(0));
×
211
    }
UNCOV
212
  }
×
213

214
  public void process008() {
UNCOV
215
    if (resultMap.get("008") != null 
×
216
        && StringUtils.isNotBlank(resultMap.get("008").get(0)))
×
217
      x008 = new Control008(resultMap.get("008").get(0), leader.getType());
×
218
  }
×
219

220
  private void processTitleWords() {
UNCOV
221
    titleWords = extractWords(StringUtils.join(resultMap.get("245$a"), " "), 3);
×
222
  }
×
223

224
  private void processType() {
UNCOV
225
    String typeOfRecord = leader.getByLabel("Type of record");
×
226
    String bibliographicLevel = leader.getByLabel("Bibliographic level");
×
227
    if (typeOfRecord.equals("a") && bibliographicLevel.equals("s")) {
×
228
      duplumKeyType = "p";
×
229
    } else if (bibliographicLevel.equals("d")) {
×
230
      duplumKeyType = "s";
×
231
    } else if (bibliographicLevel.equals("a") || bibliographicLevel.equals("b")) {
×
232
      duplumKeyType = "a";
×
233
    } else {
UNCOV
234
      duplumKeyType = "m";
×
235
    }
UNCOV
236
  }
×
237

238
  public List<String> extractWords(String text, int length) {
UNCOV
239
    List<String> tokens = new ArrayList<>();
×
240
    if (StringUtils.isBlank(text))
×
241
      return tokens;
×
242

UNCOV
243
    var st = new StringTokenizer(text);
×
244
    while (st.hasMoreTokens())
×
245
      tokens.add(st.nextToken());
×
246

UNCOV
247
    var max = Math.min(length, tokens.size());
×
248
    return tokens.subList(0, max);
×
249
  }
250

251
  public String getRecordId() {
UNCOV
252
    return recordId;
×
253
  }
254

255
  public Marc21Leader getLeader() {
UNCOV
256
    return leader;
×
257
  }
258

259
  public Control007 getX007() {
UNCOV
260
    return x007;
×
261
  }
262

263
  public Control008 getX008() {
UNCOV
264
    return x008;
×
265
  }
266

267
  public List<String> getTitleWords() {
UNCOV
268
    return titleWords;
×
269
  }
270

271
  public List<String> getAuthorWords() {
UNCOV
272
    return authorWords;
×
273
  }
274

275
  public String getDuplumKeyType() {
UNCOV
276
    return duplumKeyType;
×
277
  }
278

279
  public List<String> getDateOfPublication() {
UNCOV
280
    return dateOfPublication;
×
281
  }
282

283
  public List<String> getIsbn() {
UNCOV
284
    return isbn;
×
285
  }
286

287
  public String getPublisherOrDistributorNumber() {
UNCOV
288
    return publisherOrDistributorNumber;
×
289
  }
290

291
  public String getAbbreviatedNameOfPublisher() {
UNCOV
292
    return abbreviatedNameOfPublisher;
×
293
  }
294

295
  public String getNumberOfPart() {
UNCOV
296
    return numberOfPart;
×
297
  }
298

299
  public String getNameOfPart() {
UNCOV
300
    return nameOfPart;
×
301
  }
302

303
  public String getExtent() {
UNCOV
304
    return extent;
×
305
  }
306

307
  public String getMusicalPresentationStatement() {
UNCOV
308
    return musicalPresentationStatement;
×
309
  }
310

311
  public String getVolumeDesignation() {
UNCOV
312
    return volumeDesignation;
×
313
  }
314

315
  public String getRelatedParts() {
UNCOV
316
    return relatedParts;
×
317
  }
318

319
  private void processAuthorWords() {
UNCOV
320
    String author = extractAuthor();
×
321
    authorWords = extractWords(author, 3);
×
322
  }
×
323

324
  private String extractAuthor() {
UNCOV
325
    String author = null;
×
326
    for (String field : authorFields) {
×
327
      Object value = resultMap.get(field);
×
328
      String stringValue;
UNCOV
329
      if (value instanceof List) {
×
330
        stringValue = StringUtils.join((List)value, " ");
×
331
      } else {
UNCOV
332
        stringValue = (String)value;
×
333
      }
UNCOV
334
      if (StringUtils.isNotBlank(stringValue)) {
×
335
        author = stringValue;
×
336
        break;
×
337
      }
UNCOV
338
    }
×
339
    return author;
×
340
  }
341

342
  private void processDateOfPublication() {
UNCOV
343
    dateOfPublication = resultMap.get("260$c");
×
344
  }
×
345

346
  private void processIsbn() {
UNCOV
347
    isbn = resultMap.get("020$a");
×
348
  }
×
349

350
  private void processPublisherOrDistributorNumber() {
UNCOV
351
    publisherOrDistributorNumber = duplumKeyType.equals("m")
×
352
        ? null : StringUtils.join(resultMap.get("028$a"), "; ");
×
353
  }
×
354

355
  private void processAbbreviatedNameOfPublisher() {
UNCOV
356
    abbreviatedNameOfPublisher = StringUtils.join(resultMap.get("060$b"), "; ");
×
357
  }
×
358

359
  private void processNumberOfPart() {
UNCOV
360
    numberOfPart = StringUtils.join(resultMap.get("245$n"), "; ");
×
361
  }
×
362

363
  private void processNameOfPart() {
UNCOV
364
    nameOfPart = StringUtils.join(resultMap.get("245$p"), "; ");
×
365
  }
×
366

367
  private void processExtent() {
UNCOV
368
    extent = StringUtils.join(resultMap.get("300$a"), "; ");
×
369
  }
×
370

371
  private void processMusicalPresentationStatement() {
UNCOV
372
    musicalPresentationStatement = StringUtils.join(resultMap.get("254$a"), "; ");
×
373
  }
×
374

375
  private void processVolumeDesignation() {
UNCOV
376
    volumeDesignation = StringUtils.join(resultMap.get("490$v"), "; ");
×
377
  }
×
378

379
  private void processRelatedParts() {
UNCOV
380
    relatedParts = StringUtils.join(resultMap.get("773$g"), "; ");
×
381
  }
×
382

383
  private void processOclcFields() {
UNCOV
384
    oclcMap = new LinkedHashMap<>();
×
385
    oclcMap.put("oclcLibraryIdentifier", resolve(resultMap.get("029$a"), OrganizationCodes.getInstance()));
×
386
    oclcMap.put("otherSystemControlNumber", resultMap.get("029$b"));
×
387
    oclcMap.put("catalogingAgency", resolve(resultMap.get("040$a"), OrganizationCodes.getInstance()));
×
388
    oclcMap.put("languageOfCataloging", resolve(resultMap.get("040$b"), LanguageCodes.getInstance()));
×
389
    oclcMap.put("transcribingAgency", resolve(resultMap.get("040$c"), OrganizationCodes.getInstance()));
×
390
    oclcMap.put("modifyingAgency", resolve(resultMap.get("040$d"), OrganizationCodes.getInstance()));
×
391
    oclcMap.put("topicalTerm", resultMap.get("650$a"));
×
392
    oclcMap.put("manifestId", resultMap.get("911$9"));
×
393
    oclcMap.put("workId", resultMap.get("912$9"));
×
394
    oclcMap.put("placeOfPublication", resultMap.get("260$a"));
×
395
    oclcMap.put("nameOfPublisher", resultMap.get("260$b"));
×
396
    oclcMap.put("sourceOfHeading", resultMap.get("650$2"));
×
397
    oclcMap.put("title", resultMap.get("245$a"));
×
398
  }
×
399

400
  private Object resolve(List<String> list, CodeList codeService) {
UNCOV
401
    if (list == null || list.isEmpty())
×
402
      return list;
×
403

UNCOV
404
    List<String> resolvedList = new ArrayList<>();
×
405
    for (String code : list)
×
406
      if (codeService.isValid(code))
×
407
        resolvedList.add(codeService.getCode(code).getLabel());
×
408
      else
UNCOV
409
        resolvedList.add(code);
×
410
    return resolvedList;
×
411
  }
412

413
  public Map<String, Object> getDuplumKeyMap() {
UNCOV
414
    if (duplumKeyMap == null) {
×
415
      createDuplumKeyMap();
×
416
    }
UNCOV
417
    return duplumKeyMap;
×
418
  }
419

420
  public void createDuplumKeyMap() {
UNCOV
421
    duplumKeyMap = new HashMap<>();
×
422
    duplumKeyMap.put("recordId", recordId);
×
423
    duplumKeyMap.put("titleWords", titleWords);
×
424
    duplumKeyMap.put("authorWords", authorWords);
×
425
    duplumKeyMap.put("duplumKeyType", duplumKeyType);
×
426
    duplumKeyMap.put("dateOfPublication", dateOfPublication);
×
427
    duplumKeyMap.put("isbn", isbn);
×
428
    duplumKeyMap.put("publisherOrDistributorNumber", publisherOrDistributorNumber);
×
429
    duplumKeyMap.put("abbreviatedNameOfPublisher", abbreviatedNameOfPublisher);
×
430
    duplumKeyMap.put("numberOfPart", numberOfPart);
×
431
    duplumKeyMap.put("nameOfPart", nameOfPart);
×
432
    duplumKeyMap.put("extent", extent);
×
433
    duplumKeyMap.put("musicalPresentationStatement", musicalPresentationStatement);
×
434
    duplumKeyMap.put("volumeDesignation", volumeDesignation);
×
435
    duplumKeyMap.put("relatedParts", relatedParts);
×
436
    duplumKeyMap.put("systemControlNumbers", systemControlNumbers);
×
437
    for (Map.Entry<String, Object> entry : oclcMap.entrySet()) {
×
438
      duplumKeyMap.put(entry.getKey(), entry.getValue());
×
439
    }
×
440
  }
×
441

442
  public boolean isValid() {
UNCOV
443
    return valid;
×
444
  }
445

446
  private void processSystemControlNumbers() {
UNCOV
447
    systemControlNumbers = new ArrayList<>();
×
448
    if (resultMap.get("035$a") != null) {
×
449
      for (String original : resultMap.get("035$a")) {
×
450
        systemControlNumbers.add(new X035aSystemControlNumber(original));
×
451
      }
×
452
    }
UNCOV
453
  }
×
454
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc