• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pkiraly / metadata-qa-marc / #1527

22 Aug 2025 02:21PM UTC coverage: 90.345%. Remained the same
#1527

push

pkiraly
Improve timeline handling

5191 of 6416 new or added lines in 219 files covered. (80.91%)

886 existing lines in 78 files now uncovered.

36717 of 40641 relevant lines covered (90.34%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.2
/src/main/java/de/gwdg/metadataqa/marc/cli/FunctionalAnalysis.java
1
package de.gwdg.metadataqa.marc.cli;
2

3
import de.gwdg.metadataqa.marc.analysis.functional.FrbrFunctionLister;
4
import de.gwdg.metadataqa.marc.analysis.functional.FunctionalAnalyzer;
5
import de.gwdg.metadataqa.marc.analysis.functional.Marc21FrbrFunctionLister;
6
import de.gwdg.metadataqa.marc.analysis.functional.Marc21FunctionalAnalyzer;
7
import de.gwdg.metadataqa.marc.analysis.functional.PicaFrbrFunctionLister;
8
import de.gwdg.metadataqa.marc.analysis.functional.PicaFunctionalAnalyzer;
9
import de.gwdg.metadataqa.marc.analysis.functional.UnimarcFrbrFunctionLister;
10
import de.gwdg.metadataqa.marc.analysis.functional.UnimarcFunctionalAnalyzer;
11
import de.gwdg.metadataqa.marc.cli.parameters.CompletenessParameters;
12
import de.gwdg.metadataqa.marc.cli.processor.BibliographicInputProcessor;
13
import de.gwdg.metadataqa.marc.cli.utils.RecordIterator;
14
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
15
import de.gwdg.metadataqa.marc.definition.FRBRFunction;
16
import de.gwdg.metadataqa.marc.model.validation.ValidationError;
17
import de.gwdg.metadataqa.marc.model.validation.ValidationErrorFormat;
18
import de.gwdg.metadataqa.marc.utils.Counter;
19
import de.gwdg.metadataqa.marc.utils.FunctionValue;
20
import org.apache.commons.cli.Options;
21
import org.apache.commons.cli.ParseException;
22
import org.apache.commons.lang3.StringUtils;
23
import org.marc4j.marc.Record;
24

25
import java.io.IOException;
26
import java.io.Serializable;
27
import java.nio.file.Files;
28
import java.nio.file.Path;
29
import java.nio.file.Paths;
30
import java.util.ArrayList;
31
import java.util.List;
32
import java.util.Map;
33
import java.util.logging.Level;
34
import java.util.logging.Logger;
35

36
import static de.gwdg.metadataqa.marc.Utils.createRow;
37

38
public class FunctionalAnalysis extends QACli<CompletenessParameters> implements BibliographicInputProcessor, Serializable {
39

40
  private static final Logger logger = Logger.getLogger(FunctionalAnalysis.class.getCanonicalName());
1✔
41

42
  private final Options options;
43
  private final boolean readyToProcess;
44

45

46
  private FunctionalAnalyzer analyzer;
47
  private int recordNumber;
48

49
  public FunctionalAnalysis(String[] args) throws ParseException {
1✔
50
    parameters = new CompletenessParameters(args);
1✔
51
    options = parameters.getOptions();
1✔
52
    readyToProcess = true;
1✔
53
  }
1✔
54

55
  public static void main(String[] args) {
56
    BibliographicInputProcessor processor = null;
1✔
57
    try {
58
      processor = new FunctionalAnalysis(args);
1✔
UNCOV
59
    } catch (ParseException e) {
×
60
      logger.log(Level.SEVERE, "FunctionalAnalysis", e);
×
61
      System.exit(1);
×
62
    }
1✔
63
    if (processor.getParameters().getArgs().length < 1) {
1✔
UNCOV
64
      logger.severe("Please provide a MARC file name!");
×
65
      processor.printHelp(processor.getParameters().getOptions());
×
66
      System.exit(0);
×
67
    }
68
    if (processor.getParameters().doHelp()) {
1✔
UNCOV
69
      processor.printHelp(processor.getParameters().getOptions());
×
UNCOV
70
      System.exit(0);
×
71
    }
72
    RecordIterator iterator = new RecordIterator(processor);
1✔
73
    iterator.setProcessWithErrors(processor.getParameters().getProcessRecordsWithoutId());
1✔
74
    iterator.start();
1✔
75
  }
1✔
76

77
  @Override
78
  public CompletenessParameters getParameters() {
79
    return parameters;
1✔
80
  }
81

82
  @Override
83
  public void processRecord(Record marc4jRecord, int recordNumber) throws IOException {
84
    // do nothing
85
  }
1✔
86

87
  @Override
88
  public void processRecord(BibliographicRecord bibliographicRecord, int recordNumber, List<ValidationError> errors) throws IOException {
NEW
89
    processRecord(bibliographicRecord, recordNumber);
×
UNCOV
90
  }
×
91

92
  @Override
93
  public void processRecord(BibliographicRecord bibliographicRecord, int recordNumber) throws IOException {
94
    if (parameters.getRecordIgnorator().isIgnorable(bibliographicRecord)) {
1✔
UNCOV
95
      return;
×
96
    }
97

98
    this.recordNumber = recordNumber;
1✔
99

100
    analyzer.consumeRecord(bibliographicRecord);
1✔
101
  }
1✔
102

103

104
  @Override
105
  public void beforeIteration() {
106
    // Determine the analyzer to be used
107
    if (parameters.isMarc21()) {
1✔
108
      FrbrFunctionLister marc21FrbrFunctionLister = new Marc21FrbrFunctionLister(parameters.getMarcVersion());
1✔
109
      analyzer = new Marc21FunctionalAnalyzer(marc21FrbrFunctionLister);
1✔
110
    } else if (parameters.isPica()) {
1✔
111
      FrbrFunctionLister picaFrbrFunctionLister = new PicaFrbrFunctionLister();
1✔
112
      analyzer = new PicaFunctionalAnalyzer(picaFrbrFunctionLister);
1✔
113
    } else if (parameters.isUnimarc()) {
1✔
114
      FrbrFunctionLister unimarcFrbrFunctionLister = new UnimarcFrbrFunctionLister();
1✔
115
      analyzer = new UnimarcFunctionalAnalyzer(unimarcFrbrFunctionLister);
1✔
116
    } else {
1✔
NEW
117
      throw new IllegalArgumentException("Unknown MARC format");
×
118
    }
119

120
    logger.info(() -> analyzer.getFrbrFunctionLister().getBaselineCounterMap().toString());
1✔
121
  }
1✔
122

123
  @Override
124
  public void fileOpened(Path path) {
125
    // do nothing
126
  }
1✔
127

128
  @Override
129
  public void fileProcessed() {
130
    // do nothing
UNCOV
131
  }
×
132

133
  @Override
134
  public void afterIteration(int numberOfprocessedRecords, long duration) {
135
    String fileExtension = ".csv";
1✔
136
    final char separator = getSeparator(parameters.getFormat());
1✔
137
    if (parameters.getFormat().equals(ValidationErrorFormat.TAB_SEPARATED)) {
1✔
UNCOV
138
      fileExtension = ".tsv";
×
139
    }
140

141
    Map<FRBRFunction, List<Double>> result = analyzer.percentOf(recordNumber);
1✔
142
    saveResult(result, fileExtension, separator);
1✔
143

144
    Map<FRBRFunction, Counter<FunctionValue>> percentHistogram = analyzer.getHistogram();
1✔
145
    saveHistogram(percentHistogram, fileExtension, separator);
1✔
146

147
    saveMapping(fileExtension, separator);
1✔
148
    saveParameters("functions.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
1✔
149
  }
1✔
150

151
  private void saveMapping(String fileExtension,
152
                           char separator) {
153
    Map<FRBRFunction, List<String>> functions;
154
    functions = analyzer.getFrbrFunctionLister().getPathByFunction();
1✔
155

156
    var path = Paths.get(parameters.getOutputDir(), "functional-analysis-mapping" + fileExtension);
1✔
157
    try (var writer = Files.newBufferedWriter(path)) {
1✔
158
      writer.write("frbrfunction" + separator + "count" + separator + "fields\n");
1✔
159
      for (FRBRFunction function : FRBRFunction.values()) {
1✔
160
        if (function.getParent() != null) {
1✔
161
          List<String> paths = functions != null ? functions.getOrDefault(function, new ArrayList<>()) : new ArrayList<>();
1✔
162
          List<Object> cells = new ArrayList<>();
1✔
163
          cells.add(function.toString());
1✔
164
          cells.add(paths.size());
1✔
165
          cells.add(StringUtils.join(paths, ";"));
1✔
166
          writer.write(createRow(cells));
1✔
167
        }
168
      }
169
    } catch (IOException e) {
×
UNCOV
170
      logger.log(Level.SEVERE, "afterIteration", e);
×
171
    }
1✔
172
  }
1✔
173

174
  private void saveHistogram(Map<FRBRFunction, Counter<FunctionValue>> histogram,
175
                             String fileExtension,
176
                             char separator) {
177
    logger.info("Functional analysis histogram");
1✔
178
    var path = Paths.get(
1✔
179
      parameters.getOutputDir(),
1✔
180
      "functional-analysis-histogram" + fileExtension
181
    );
182
    try (var writer = Files.newBufferedWriter(path)) {
1✔
183
      writer.write("frbrfunction" + separator + "functioncount" + separator + "score" + separator + "count\n");
1✔
184
      histogram
1✔
185
        .entrySet()
1✔
186
        .stream()
1✔
187
        .forEach(entry -> {
1✔
188
          String function = entry.getKey().name();
1✔
189
          Map<FunctionValue, Integer> histogramOfFunction = entry.getValue().getMap();
1✔
190
          histogramOfFunction
1✔
191
            .keySet()
1✔
192
            .stream()
1✔
193
            .sorted((a, b) -> ((Integer) a.getCount()).compareTo(b.getCount()))
1✔
194
            .forEach(functionValue -> {
1✔
195
              Integer count = histogramOfFunction.get(functionValue);
1✔
196
              try {
197
                writer.write(createRow(function, functionValue.getCount(), functionValue.getPercentage(), count));
1✔
UNCOV
198
              } catch (IOException e) {
×
UNCOV
199
                logger.log(Level.SEVERE, "saveHistogram", e);
×
200
              }
1✔
201
            });
1✔
202
        });
1✔
UNCOV
203
    } catch (IOException e) {
×
UNCOV
204
      logger.log(Level.SEVERE, "saveHistogram", e);
×
205
    }
1✔
206
  }
1✔
207

208
  private void saveResult(Map<FRBRFunction, List<Double>> result,
209
                          String fileExtension,
210
                          char separator) {
211

212
    logger.info("Saving functional analysis");
1✔
213
    var path = Paths.get(parameters.getOutputDir(), "functional-analysis" + fileExtension);
1✔
214
    try (var writer = Files.newBufferedWriter(path)) {
1✔
215
      writer.write("frbr-function" + separator + "avgcount" + separator + "avgscore\n");
1✔
216
      result
1✔
217
        .entrySet()
1✔
218
        .stream()
1✔
219
        .forEach(entry -> {
1✔
220
          try {
221
            List<Double> values = entry.getValue();
1✔
222
            writer.write(createRow(entry.getKey().name(), values.get(0), values.get(1)));
1✔
UNCOV
223
          } catch (IOException e) {
×
UNCOV
224
            logger.log(Level.SEVERE, "saveResult", e);
×
225
          }
1✔
226
        });
1✔
UNCOV
227
    } catch (IOException e) {
×
UNCOV
228
      logger.log(Level.SEVERE, "saveResult", e);
×
229
    }
1✔
230
  }
1✔
231

232
  // TODO: move it in a common class
233
  private char getSeparator(ValidationErrorFormat format) {
234
    if (format.equals(ValidationErrorFormat.TAB_SEPARATED)) {
1✔
UNCOV
235
      return '\t';
×
236
    } else {
237
      return ',';
1✔
238
    }
239
  }
240

241
  @Override
242
  public void printHelp(Options options) {
243
    // do nothing
UNCOV
244
  }
×
245

246
  @Override
247
  public boolean readyToProcess() {
248
    return readyToProcess;
1✔
249
  }
250

251
  public FunctionalAnalyzer getAnalyzer() {
252
    return analyzer;
1✔
253
  }
254

255
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc