• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pkiraly / metadata-qa-marc / #1527

22 Aug 2025 02:21PM UTC coverage: 90.345%. Remained the same
#1527

push

pkiraly
Improve timeline handling

5191 of 6416 new or added lines in 219 files covered. (80.91%)

886 existing lines in 78 files now uncovered.

36717 of 40641 relevant lines covered (90.34%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.47
/src/main/java/de/gwdg/metadataqa/marc/cli/TranslationAnalysis.java
1
package de.gwdg.metadataqa.marc.cli;
2

3
import com.fasterxml.jackson.databind.ObjectMapper;
4
import de.gwdg.metadataqa.api.configuration.SchemaConfiguration;
5
import de.gwdg.metadataqa.api.interfaces.MetricResult;
6
import de.gwdg.metadataqa.api.model.XmlFieldInstance;
7
import de.gwdg.metadataqa.api.rule.RuleCatalog;
8
import de.gwdg.metadataqa.api.rule.RuleCheckerOutput;
9
import de.gwdg.metadataqa.api.rule.RuleCheckingOutputStatus;
10
import de.gwdg.metadataqa.marc.CsvUtils;
11
import de.gwdg.metadataqa.marc.RuleCatalogUtils;
12
import de.gwdg.metadataqa.marc.cli.parameters.CommonParameters;
13
import de.gwdg.metadataqa.marc.cli.parameters.TranslationParameters;
14
import de.gwdg.metadataqa.marc.cli.processor.BibliographicInputProcessor;
15
import de.gwdg.metadataqa.marc.cli.utils.BibSelector;
16
import de.gwdg.metadataqa.marc.cli.utils.BibSelectorFactory;
17
import de.gwdg.metadataqa.marc.cli.utils.RecordIterator;
18
import de.gwdg.metadataqa.marc.cli.utils.ShaclUtils;
19
import de.gwdg.metadataqa.marc.cli.utils.TranslationModel;
20
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordFilter;
21
import de.gwdg.metadataqa.marc.cli.utils.placename.PlaceNameNormaliser;
22
import de.gwdg.metadataqa.marc.cli.utils.translation.ContributorNormaliser;
23
import de.gwdg.metadataqa.marc.cli.utils.translation.PublicationYearNormaliser;
24
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
25
import de.gwdg.metadataqa.marc.model.validation.ValidationError;
26
import org.apache.commons.cli.Options;
27
import org.apache.commons.cli.ParseException;
28
import org.marc4j.marc.Record;
29

30
import java.io.File;
31
import java.io.IOException;
32
import java.io.Serializable;
33
import java.nio.file.Files;
34
import java.nio.file.Path;
35
import java.util.HashMap;
36
import java.util.List;
37
import java.util.Map;
38
import java.util.logging.Level;
39
import java.util.logging.Logger;
40
import java.util.stream.Collectors;
41

42
public class TranslationAnalysis extends QACli<TranslationParameters>
43
                                 implements BibliographicInputProcessor, Serializable {
44

45
  private static final Logger logger = Logger.getLogger(TranslationAnalysis.class.getCanonicalName());
1✔
46

47
  private boolean readyToProcess;
48
  private File outputFile;
49
  private RuleCatalog ruleCatalog;
50
  private SchemaConfiguration schema;
51
  private final RecordFilter recordFilter;
52
  private Map<String, Long> failed;
53
  private Map<String, String> rulePathMap;
54
  private Map<String, File> debugFiles;
55
  private PlaceNameNormaliser placeNameNormaliser;
56
  private ObjectMapper mapper;
57
  private File exportFile;
58
  private PublicationYearNormaliser publicationYearNormaliser;
59
  private ContributorNormaliser contributorNormaliser;
60

61
  public TranslationAnalysis(String[] args) throws ParseException {
1✔
62
    parameters = new TranslationParameters(args);
1✔
63
    recordFilter = parameters.getRecordFilter();
1✔
64
    readyToProcess = true;
1✔
65
  }
1✔
66

67
  public static void main(String[] args) {
NEW
68
    BibliographicInputProcessor processor = null;
×
69
    try {
NEW
70
      processor = new TranslationAnalysis(args);
×
NEW
71
    } catch (ParseException e) {
×
NEW
72
      System.err.println("ERROR. " + e.getLocalizedMessage());
×
NEW
73
      System.exit(1);
×
NEW
74
    }
×
NEW
75
    if (processor.getParameters().getArgs().length < 1) {
×
NEW
76
      System.err.println("Please provide a MARC file name!");
×
NEW
77
      processor.printHelp(processor.getParameters().getOptions());
×
NEW
78
      System.exit(0);
×
79
    }
NEW
80
    if (processor.getParameters().doHelp()) {
×
NEW
81
      processor.printHelp(processor.getParameters().getOptions());
×
NEW
82
      System.exit(0);
×
83
    }
NEW
84
    RecordIterator iterator = new RecordIterator(processor);
×
NEW
85
    iterator.setProcessWithErrors(processor.getParameters().getProcessRecordsWithoutId());
×
NEW
86
    iterator.start();
×
NEW
87
  }
×
88

89
  @Override
90
  public CommonParameters getParameters() {
91
    return parameters;
1✔
92
  }
93

94
  @Override
95
  public void beforeIteration() {
96
    logger.info(parameters.formatParameters());
1✔
97
    outputFile = new File(parameters.getOutputDir(), parameters.getShaclOutputFile());
1✔
98

99
    schema = ShaclUtils.setupSchema(parameters.getTranslationConfigurationFile());
1✔
100
    ruleCatalog = ShaclUtils.setupRuleCatalog(schema, parameters);
1✔
101
    rulePathMap = ShaclUtils.createRulePathMap(schema);
1✔
102
    failed = new HashMap<>();
1✔
103
    if (parameters.getDebugFailedRules() != null && !parameters.getDebugFailedRules().isEmpty()) {
1✔
104
      debugFiles = new HashMap<>();
1✔
105
      for (String ruleId : parameters.getDebugFailedRules()) {
1✔
106
        debugFiles.put(ruleId, new File(parameters.getOutputDir(), String.format("translations-deubg-%s.txt", ruleId)));
1✔
107
      }
1✔
108
    }
109

110
    if (outputFile.exists()) {
1✔
111
      try {
NEW
112
        Files.delete(outputFile.toPath());
×
NEW
113
      } catch (IOException e) {
×
NEW
114
        logger.log(Level.SEVERE, "The output file ({}) has not been deleted", outputFile.getAbsolutePath());
×
NEW
115
      }
×
116
    }
117
    List<String> header = ruleCatalog.getHeader();
1✔
118
    header.add(0, "id");
1✔
119
    header.addAll(TranslationModel.header());
1✔
120
    printToFile(outputFile, CsvUtils.createCsv(header));
1✔
121

122
    if (parameters.getTranslationPlaceNameDictionaryDir() != null)
1✔
123
      placeNameNormaliser = new PlaceNameNormaliser(
1✔
124
        parameters.getTranslationPlaceNameDictionaryDir(),
1✔
125
        parameters.getOutputDir()
1✔
126
      );
127

128
    if (parameters.getTranslationExport() != null) {
1✔
129
      exportFile = new File(parameters.getOutputDir(), parameters.getTranslationExport());
1✔
130
      if (exportFile.exists())
1✔
131
        exportFile.delete();
1✔
132
      mapper = new ObjectMapper();
1✔
133
    }
134

135
    publicationYearNormaliser = new PublicationYearNormaliser(parameters.getOutputDir());
1✔
136
    contributorNormaliser = new ContributorNormaliser(
1✔
137
      parameters.getOutputDir(),
1✔
138
      schema.asSchema().getPathByLabel("245$c").getRules().get(0).getMqafPattern().getCompiledPattern()
1✔
139
    );
140
  }
1✔
141

142
  @Override
143
  public void fileOpened(Path path) {
144
    logger.log(Level.INFO, "file opened: {0}", new Object[]{path});
1✔
145
  }
1✔
146

147
  @Override
148
  public void processRecord(Record marc4jRecord, int recordNumber) throws IOException {
149
    // do nothing
150
  }
1✔
151

152
  @Override
153
  public void processRecord(BibliographicRecord bibliographicRecord,
154
                            int recordNumber,
155
                            List<ValidationError> errors) throws IOException {
NEW
156
    processRecord(bibliographicRecord, recordNumber);
×
NEW
157
  }
×
158

159
  @Override
160
  public void processRecord(BibliographicRecord bibliographicRecord,
161
                            int recordNumber)
162
      throws IOException {
163
    if (!recordFilter.isAllowable(bibliographicRecord)) {
1✔
164
      // logger.info("ignoring " + bibliographicRecord.getId());
NEW
165
      return;
×
166
    }
167

168
    BibSelector selector = BibSelectorFactory.create(schema.getFormat(), bibliographicRecord);
1✔
169

170
    if (selector != null) {
1✔
171
      List<MetricResult> results = ruleCatalog.measure(selector);
1✔
172
      Map<String, RuleCheckerOutput> resultMap = (Map<String, RuleCheckerOutput>) results.get(0).getResultMap();
1✔
173
      TranslationModel model = new TranslationModel(resultMap, selector, placeNameNormaliser, publicationYearNormaliser, contributorNormaliser, parameters.getMarcVersion());
1✔
174

175
      List<String> debugIds = parameters.getDebugFailedRules();
1✔
176
      if (debugIds != null && !debugIds.isEmpty()) {
1✔
177
        for (String debugId : debugIds) {
1✔
178
          if (resultMap.get(debugId).getStatus().equals(RuleCheckingOutputStatus.FAILED)) {
1✔
179
            failed.computeIfAbsent(debugId, k -> 0L);
1✔
180
            failed.put(debugId, failed.get(debugId) + 1);
1✔
181
            List<XmlFieldInstance> values = selector.get(rulePathMap.get(debugId));
1✔
182
            printToFile(debugFiles.get(debugId), values.get(0).getValue() + "\n");
1✔
183
          }
184
        }
1✔
185
      }
186

187
      List<Object> values = RuleCatalogUtils.extract(ruleCatalog, results);
1✔
188
      values.add(0, bibliographicRecord.getId(true));
1✔
189
      values.addAll(model.values());
1✔
190

191
      printToFile(outputFile, CsvUtils.createCsvFromObjects(values));
1✔
192

193
      if (model.isTranslation()) {
1✔
194
        Map<String, Object> extracted = model.extract();
1✔
195
        extracted.put("id", bibliographicRecord.getId());
1✔
196
        if (exportFile != null && mapper != null)
1✔
197
          printToFile(exportFile, mapper.writeValueAsString(extracted) + "\n");
1✔
198
      }
199
    }
200
  }
1✔
201

202
  @Override
203
  public void fileProcessed() {
204
    // do nothing
NEW
205
  }
×
206

207
  @Override
208
  public void afterIteration(int numberOfprocessedRecords, long duration) {
209
    String report = failed.entrySet().stream()
1✔
210
        .map(e ->
1✔
211
          String.format(
1✔
212
            "%s: %d failures (%.2f%% of records)",
213
            e.getKey(), e.getValue(), e.getValue() * 100.0 / numberOfprocessedRecords
1✔
214
          ))
215
          .collect(Collectors.joining(", "));
1✔
216
    if (!report.isEmpty())
1✔
217
      logger.log(Level.WARNING, "failed rules: {0}", report);
1✔
218
    copyFileToOutputDir(parameters.getTranslationConfigurationFile());
1✔
219
    saveParameters("translation-analysis.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
1✔
220

221
    placeNameNormaliser.reportUnresolvedPlaceNames();
1✔
222
    publicationYearNormaliser.reportUnresolvedYears();
1✔
223
    publicationYearNormaliser.reportPatterns();
1✔
224
    contributorNormaliser.reportUnresolvedContributors();
1✔
225
  }
1✔
226

227
  @Override
228
  public void printHelp(Options options) {
229

NEW
230
  }
×
231

232
  @Override
233
  public boolean readyToProcess() {
234
    return readyToProcess;
1✔
235
  }
236
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc