• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pkiraly / metadata-qa-marc / #1527

22 Aug 2025 02:21PM UTC coverage: 90.345%. Remained the same
#1527

push

pkiraly
Improve timeline handling

5191 of 6416 new or added lines in 219 files covered. (80.91%)

886 existing lines in 78 files now uncovered.

36717 of 40641 relevant lines covered (90.34%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.33
/src/main/java/de/gwdg/metadataqa/marc/cli/AuthorityAnalysis.java
1
package de.gwdg.metadataqa.marc.cli;
2

3
import de.gwdg.metadataqa.marc.Utils;
4
import de.gwdg.metadataqa.marc.analysis.contextual.authority.AuthorityAnalyzer;
5
import de.gwdg.metadataqa.marc.analysis.contextual.authority.AuthorityCategory;
6
import de.gwdg.metadataqa.marc.analysis.contextual.authority.AuthorityStatistics;
7
import de.gwdg.metadataqa.marc.analysis.contextual.authority.Marc21AuthorityAnalyzer;
8
import de.gwdg.metadataqa.marc.analysis.contextual.authority.PicaAuthorityAnalyzer;
9
import de.gwdg.metadataqa.marc.analysis.contextual.authority.UnimarcAuthorityAnalyzer;
10
import de.gwdg.metadataqa.marc.cli.parameters.CommonParameters;
11
import de.gwdg.metadataqa.marc.cli.parameters.ValidatorParameters;
12
import de.gwdg.metadataqa.marc.cli.processor.BibliographicInputProcessor;
13
import de.gwdg.metadataqa.marc.cli.utils.RecordIterator;
14
import de.gwdg.metadataqa.marc.cli.utils.Schema;
15
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
16
import de.gwdg.metadataqa.marc.model.validation.ValidationError;
17
import org.apache.commons.cli.Options;
18
import org.apache.commons.cli.ParseException;
19
import org.apache.commons.lang3.StringUtils;
20
import org.marc4j.marc.Record;
21

22
import java.io.BufferedWriter;
23
import java.io.IOException;
24
import java.io.Serializable;
25
import java.nio.file.Files;
26
import java.nio.file.Path;
27
import java.nio.file.Paths;
28
import java.util.Arrays;
29
import java.util.HashMap;
30
import java.util.List;
31
import java.util.Map;
32
import java.util.logging.Level;
33
import java.util.logging.Logger;
34

35
import static de.gwdg.metadataqa.marc.Utils.count;
36
import static de.gwdg.metadataqa.marc.Utils.quote;
37

38
public class AuthorityAnalysis extends QACli<ValidatorParameters> implements BibliographicInputProcessor, Serializable {
39

40
  private static final Logger logger = Logger.getLogger(AuthorityAnalysis.class.getCanonicalName());
1✔
41

42
  private Map<Integer, Integer> histogram = new HashMap<>();
1✔
43
  private Map<Integer, String> frequencyExamples = new HashMap<>();
1✔
44
  private Map<Boolean, Integer> hasClassifications = new HashMap<>();
1✔
45
  private boolean readyToProcess;
46
  private static char separator = ',';
1✔
47
  AuthorityStatistics statistics = new AuthorityStatistics();
1✔
48

49
  public AuthorityAnalysis(String[] args) throws ParseException {
1✔
50
    parameters = new ValidatorParameters(args);
1✔
51
    readyToProcess = true;
1✔
52
    Schema.resetIdCounter();
1✔
53
  }
1✔
54

55
  public static void main(String[] args) {
56
    BibliographicInputProcessor processor = null;
1✔
57
    try {
58
      processor = new AuthorityAnalysis(args);
1✔
UNCOV
59
    } catch (ParseException e) {
×
UNCOV
60
      System.err.println(createRow("ERROR. ", e.getLocalizedMessage()));
×
UNCOV
61
      System.exit(1);
×
62
    }
1✔
63
    if (processor.getParameters().getArgs().length < 1) {
1✔
64
      System.err.println("Please provide a MARC file name!");
×
65
      processor.printHelp(processor.getParameters().getOptions());
×
UNCOV
66
      System.exit(0);
×
67
    }
68
    if (processor.getParameters().doHelp()) {
1✔
69
      processor.printHelp(processor.getParameters().getOptions());
×
70
      System.exit(0);
×
71
    }
72
    var iterator = new RecordIterator(processor);
1✔
73
    iterator.setProcessWithErrors(processor.getParameters().getProcessRecordsWithoutId());
1✔
74
    iterator.start();
1✔
75
  }
1✔
76

77
  @Override
78
  public CommonParameters getParameters() {
79
    return parameters;
1✔
80
  }
81

82
  @Override
83
  public void processRecord(Record marc4jRecord, int recordNumber) throws IOException {
84
    // do nothing
85
  }
1✔
86

87
  @Override
88
  public void processRecord(BibliographicRecord bibliographicRecord, int recordNumber, List<ValidationError> errors) throws IOException {
NEW
89
    processRecord(bibliographicRecord, recordNumber);
×
UNCOV
90
  }
×
91

92
  @Override
93
  public void processRecord(BibliographicRecord marcRecord, int recordNumber) throws IOException {
94
    if (parameters.getRecordIgnorator().isIgnorable(marcRecord))
1✔
95
      return;
×
96

97
    // Depending on the type of record, create the appropriate analyzer
98
    AuthorityAnalyzer analyzer;
99
    if (parameters.isMarc21()) {
1✔
NEW
100
      analyzer = new Marc21AuthorityAnalyzer(marcRecord, statistics);
×
101
    } else if (parameters.isPica()) {
1✔
102
      analyzer = new PicaAuthorityAnalyzer(marcRecord, statistics);
1✔
103
    } else if (parameters.isUnimarc()) {
1✔
104
      analyzer = new UnimarcAuthorityAnalyzer(marcRecord, statistics);
1✔
105
    } else {
NEW
106
      logger.log(Level.SEVERE, "Unhandled schema type: {0}", new Object[]{parameters.getSchemaType()});
×
NEW
107
      return;
×
108
    }
109
    int count = analyzer.process();
1✔
110
    count((count > 0), hasClassifications);
1✔
111
    count(count, histogram);
1✔
112

113
    frequencyExamples.computeIfAbsent(count, s -> marcRecord.getId(true));
1✔
114
  }
1✔
115

116
  @Override
117
  public void beforeIteration() {
118
    // do nothing
119
  }
1✔
120

121
  @Override
122
  public void fileOpened(Path path) {
123
    // do nothing
124
  }
1✔
125

126
  @Override
127
  public void fileProcessed() {
128
    // do nothing
UNCOV
129
  }
×
130

131
  @Override
132
  public void afterIteration(int numberOfprocessedRecords, long duration) {
133
    printAuthoritiesByCategories();
1✔
134
    printAuthoritiesBySchema();
1✔
135
    printAuthoritiesByRecords();
1✔
136
    printAuthoritiesHistogram();
1✔
137
    printFrequencyExamples();
1✔
138
    printAuthoritiesSubfieldsStatistics();
1✔
139
    saveParameters("authorities.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
1✔
140
  }
1✔
141

142
  private void printAuthoritiesByCategories() {
143
    var path = Paths.get(parameters.getOutputDir(), "authorities-by-categories.csv");
1✔
144
    try (var writer = Files.newBufferedWriter(path)) {
1✔
145
      writer.write(createRow("category", "recordcount", "instancecount"));
1✔
146
      statistics.getRecordsPerCategories()
1✔
147
        .entrySet()
1✔
148
        .stream()
1✔
149
        .sorted((e1, e2) -> Integer.compare(e1.getKey().getId(), e2.getKey().getId()))
1✔
150
        .forEach(
1✔
151
          entry -> {
152
            AuthorityCategory category = entry.getKey();
1✔
153
            int recordCount = entry.getValue();
1✔
154
            int instanceCount = statistics.getInstancesPerCategories().get(category);
1✔
155
            try {
156
              writer.write(createRow(
1✔
157
                quote(category.getLabel()),
1✔
158
                recordCount,
1✔
159
                instanceCount
1✔
160
              ));
UNCOV
161
            } catch (IOException | NullPointerException ex) {
×
UNCOV
162
              logger.log(Level.SEVERE, "build", ex);
×
UNCOV
163
              logger.severe(category.toString());
×
164
            }
1✔
165
          }
1✔
166
        );
UNCOV
167
    } catch (IOException e) {
×
UNCOV
168
      logger.log(Level.SEVERE, "printAuthoritiesByCategories", e);
×
169
    }
1✔
170
  }
1✔
171

172
  private void printAuthoritiesBySchema() {
173
    var path = Paths.get(parameters.getOutputDir(), "authorities-by-schema.csv");
1✔
174
    try (var writer = Files.newBufferedWriter(path)) {
1✔
175
      writer.write(createRow("id", "field", "location", "scheme", "abbreviation", "abbreviation4solr", "recordcount", "instancecount"));
1✔
176
      statistics.getInstances()
1✔
177
        .entrySet()
1✔
178
        .stream()
1✔
179
        .sorted((e1, e2) -> {
1✔
180
            int i = e1.getKey().getField().compareTo(e2.getKey().getField());
1✔
181
            if (i != 0)
1✔
182
              return i;
1✔
183
            else {
184
              i = e1.getKey().getLocation().compareTo(e2.getKey().getLocation());
1✔
185
              if (i != 0)
1✔
UNCOV
186
                return i;
×
187
              else
188
                return e2.getValue().compareTo(e1.getValue());
1✔
189
            }
190
          }
191
        )
192
        .forEach(
1✔
193
          entry -> printSingleClassificationBySchema(writer, entry)
1✔
194
        );
UNCOV
195
    } catch (IOException e) {
×
UNCOV
196
      logger.log(Level.SEVERE, "printAuthoritiesBySchema", e);
×
197
    }
1✔
198
  }
1✔
199

200
  private void printSingleClassificationBySchema(BufferedWriter writer, Map.Entry<Schema, Integer> entry) {
201
    Schema schema = entry.getKey();
1✔
202
    int instanceCount = entry.getValue();
1✔
203
    int recordCount = statistics.getRecords().get(schema);
1✔
204
    try {
205
      writer.write(createRow(
1✔
206
        schema.getId(),
1✔
207
        schema.getField(),
1✔
208
        schema.getLocation(),
1✔
209
        '"' + schema.getSchema().replace("\"", "\\\"") + '"',
1✔
210
        schema.getAbbreviation(),
1✔
211
        Utils.solarize(schema.getAbbreviation()),
1✔
212
        recordCount,
1✔
213
        instanceCount
1✔
214
      ));
UNCOV
215
    } catch (IOException | NullPointerException e) {
×
UNCOV
216
      logger.log(Level.SEVERE, "printSingleClassificationBySchema", e);
×
UNCOV
217
      System.err.println(schema);
×
218
    }
1✔
219
  }
1✔
220

221
  private void printAuthoritiesByRecords() {
222
    Path path;
223
    path = Paths.get(parameters.getOutputDir(), "authorities-by-records.csv");
1✔
224
    try (var writer = Files.newBufferedWriter(path)) {
1✔
225
      writer.write(createRow("records-with-authorities", "count"));
1✔
226
      hasClassifications
1✔
227
        .entrySet()
1✔
228
        .stream()
1✔
229
        .sorted((e1, e2) ->
1✔
230
          e2.getValue().compareTo(e1.getValue()))
1✔
231
        .forEach(
1✔
232
          e -> {
233
            try {
234
              writer.write(createRow(e.getKey().toString(), e.getValue()));
1✔
UNCOV
235
            } catch (IOException ex) {
×
UNCOV
236
              logger.log(Level.SEVERE, "printAuthoritiesByRecords", ex);
×
237
            }
1✔
238
          }
1✔
239
        );
UNCOV
240
    } catch (IOException e) {
×
UNCOV
241
      logger.log(Level.SEVERE, "printAuthoritiesByRecords", e);
×
242
    }
1✔
243
  }
1✔
244

245
  private void printAuthoritiesHistogram() {
246
    var path = Paths.get(parameters.getOutputDir(), "authorities-histogram.csv");
1✔
247
    try (var writer = Files.newBufferedWriter(path)) {
1✔
248
      writer.write(createRow("count", "frequency"));
1✔
249
      histogram
1✔
250
        .entrySet()
1✔
251
        .stream()
1✔
252
        .sorted((e1, e2) -> e1.getKey().compareTo(e2.getKey()))
1✔
253
        .forEach(
1✔
254
          entry -> {
255
            try {
256
              writer.write(createRow(entry.getKey(), entry.getValue()));
1✔
257
            } catch (IOException e) {
×
258
              logger.log(Level.SEVERE, "printAuthoritiesHistogram", e);
×
259
            }
1✔
260
          }
1✔
261
        );
UNCOV
262
    } catch (IOException e) {
×
UNCOV
263
      logger.log(Level.SEVERE, "printAuthoritiesHistogram", e);
×
264
    }
1✔
265
  }
1✔
266

267
  private void printFrequencyExamples() {
268
    var path = Paths.get(parameters.getOutputDir(), "authorities-frequency-examples.csv");
1✔
269
    try (var writer = Files.newBufferedWriter(path)) {
1✔
270
      writer.write(createRow("count", "id"));
1✔
271
      frequencyExamples
1✔
272
        .entrySet()
1✔
273
        .stream()
1✔
274
        .sorted((e1, e2) -> e1.getKey().compareTo(e2.getKey()))
1✔
275
        .forEach(
1✔
276
          entry -> {
277
            try {
278
              writer.write(createRow(entry.getKey(), entry.getValue()));
1✔
279
            } catch (IOException e) {
×
280
              logger.log(Level.SEVERE, "printFrequencyExamples", e);
×
281
            }
1✔
282
          }
1✔
283
        );
UNCOV
284
    } catch (IOException e) {
×
UNCOV
285
      logger.log(Level.SEVERE, "printFrequencyExamples", e);
×
286
    }
1✔
287
  }
1✔
288

289
  private void printAuthoritiesSubfieldsStatistics() {
290
    var path = Paths.get(parameters.getOutputDir(), "authorities-by-schema-subfields.csv");
1✔
291
    try (var writer = Files.newBufferedWriter(path)) {
1✔
292
      // final List<String> header = Arrays.asList("field", "location", "label", "abbreviation", "subfields", "scount");
293
      final List<String> header = Arrays.asList("id", "subfields", "count");
1✔
294
      writer.write(createRow(header));
1✔
295
      statistics.getSubfields()
1✔
296
        .entrySet()
1✔
297
        .stream()
1✔
298
        .sorted((e1, e2) ->
1✔
299
          e1.getKey().getField().compareTo(e2.getKey().getField()))
1✔
300
        .forEach(
1✔
301
          schemaEntry -> printSingleSchemaSubfieldsStatistics(writer, schemaEntry)
1✔
302
        );
UNCOV
303
    } catch (IOException e) {
×
UNCOV
304
      logger.log(Level.SEVERE, "printAuthoritiesSubfieldsStatistics", e);
×
305
    }
1✔
306
  }
1✔
307

308
  private void printSingleSchemaSubfieldsStatistics(BufferedWriter writer, Map.Entry<Schema, Map<List<String>, Integer>> schemaEntry) {
309
    Schema schema = schemaEntry.getKey();
1✔
310
    Map<List<String>, Integer> val = schemaEntry.getValue();
1✔
311
    val
1✔
312
      .entrySet()
1✔
313
      .stream()
1✔
314
      .sorted((count1, count2) -> count2.getValue().compareTo(count1.getValue()))
1✔
315
      .forEach(
1✔
316
        countEntry -> {
317
          List<String> subfields = countEntry.getKey();
1✔
318
          int count = countEntry.getValue();
1✔
319
          try {
320
            writer.write(createRow(
1✔
321
              schema.getId(),
1✔
322
              // schema.field,
323
              // schema.location,
324
              // '"' + schema.schema.replace("\"", "\\\"") + '"',
325
              // schema.abbreviation,
326
              StringUtils.join(subfields, ';'),
1✔
327
              count
1✔
328
            ));
UNCOV
329
          } catch (IOException ex) {
×
UNCOV
330
            logger.log(Level.SEVERE, "printSingleSchemaSubfieldsStatistics", ex);
×
331
          }
1✔
332
        }
1✔
333
      );
334
  }
1✔
335

336
  private static String createRow(List<String> fields) {
337
    return StringUtils.join(fields, separator) + "\n";
1✔
338
  }
339

340
  private static String createRow(Object... fields) {
341
    return StringUtils.join(fields, separator) + "\n";
1✔
342
  }
343

344
  @Override
345
  public void printHelp(Options options) {
346
    // do nothing
347
  }
×
348

349
  @Override
350
  public boolean readyToProcess() {
351
    return readyToProcess;
1✔
352
  }
353
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc