• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pkiraly / metadata-qa-marc / #1527

22 Aug 2025 02:21PM UTC coverage: 90.345%. Remained the same
#1527

push

pkiraly
Improve timeline handling

5191 of 6416 new or added lines in 219 files covered. (80.91%)

886 existing lines in 78 files now uncovered.

36717 of 40641 relevant lines covered (90.34%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

59.89
/src/main/java/de/gwdg/metadataqa/marc/cli/utils/RecordIterator.java
1
package de.gwdg.metadataqa.marc.cli.utils;
2

3
import de.gwdg.metadataqa.marc.MarcFactory;
4
import de.gwdg.metadataqa.marc.Utils;
5
import de.gwdg.metadataqa.marc.cli.parameters.CommonParameters;
6
import de.gwdg.metadataqa.marc.cli.processor.BibliographicInputProcessor;
7
import de.gwdg.metadataqa.marc.dao.MarcLeader;
8
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
9
import de.gwdg.metadataqa.marc.definition.DataSource;
10
import de.gwdg.metadataqa.marc.definition.MarcVersion;
11
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;
12
import de.gwdg.metadataqa.marc.utils.QAMarcReaderFactory;
13
import de.gwdg.metadataqa.marc.utils.marcreader.AlephseqMarcReader;
14
import de.gwdg.metadataqa.marc.utils.marcreader.ErrorAwareReader;
15
import de.gwdg.metadataqa.marc.utils.pica.PicaSchemaManager;
16
import de.gwdg.metadataqa.marc.utils.pica.PicaSchemaReader;
17
import de.gwdg.metadataqa.marc.utils.unimarc.UnimarcSchemaManager;
18
import de.gwdg.metadataqa.marc.utils.unimarc.UnimarcSchemaReader;
19
import org.apache.commons.cli.HelpFormatter;
20
import org.apache.commons.cli.Options;
21
import org.apache.solr.client.solrj.SolrServerException;
22
import org.marc4j.MarcException;
23
import org.marc4j.MarcReader;
24
import org.marc4j.marc.Record;
25
import org.marc4j.marc.VariableField;
26
import org.marc4j.marc.impl.ControlFieldImpl;
27

28
import java.io.FileInputStream;
29
import java.nio.file.Path;
30
import java.nio.file.Paths;
31
import java.text.DecimalFormat;
32
import java.util.logging.Level;
33
import java.util.logging.Logger;
34
import java.util.zip.GZIPInputStream;
35

36
/**
37
 * usage:
38
 * java -cp target/qa-catalogue-0.1-SNAPSHOT-jar-with-dependencies.jar de.gwdg.metadataqa.marc.cli.Validator [MARC21 file]
39
 * @author Péter Király <peter.kiraly at gwdg.de>
40
 */
41
public class RecordIterator {
42

43
  private static final Logger logger = Logger.getLogger(RecordIterator.class.getCanonicalName());
1✔
44
  private final BibliographicInputProcessor processor;
45
  private int recordNumber = 0;
1✔
46
  private String lastKnownId = "";
1✔
47
  private CommonParameters parameters;
48
  private String replacementInControlFields;
49
  private MarcVersion marcVersion;
50
  private MarcLeader.Type defaultRecordType;
51
  private DecimalFormat decimalFormat;
52
  // this schema attribute could be merged with the UNIMARC one
53
  private PicaSchemaManager picaSchema;
54
  private UnimarcSchemaManager unimarcSchema;
55
  private String status = "waits";
1✔
56
  private boolean processWithErrors = false;
1✔
57
  private long start;
58

59
  public RecordIterator(BibliographicInputProcessor processor) {
1✔
60
    this.processor = processor;
1✔
61
    status = "initialized";
1✔
62
  }
1✔
63

64
  public void start() {
65
    start = System.currentTimeMillis();
1✔
66
    processor.beforeIteration();
1✔
67
    parameters = processor.getParameters();
1✔
68

69
    marcVersion = parameters.getMarcVersion();
1✔
70
    defaultRecordType = parameters.getDefaultRecordType();
1✔
71
    replacementInControlFields = parameters.getReplacementInControlFields();
1✔
72
    decimalFormat = new DecimalFormat();
1✔
73

74
    setupSchema();
1✔
75

76
    if (processor.getParameters().doLog())
1✔
77
      logger.info("marcVersion: " + marcVersion.getCode() + ", " + marcVersion.getLabel());
1✔
78

79
    if (parameters.getDataSource().equals(DataSource.FILE)) {
1✔
80
      String[] inputFileNames = processor.getParameters().getArgs();
1✔
81
      for (String inputFileName : inputFileNames) {
1✔
82
        if (!processor.readyToProcess())
1✔
UNCOV
83
          break;
×
84
        processFile(inputFileName);
1✔
85
      }
86
    } else if (parameters.getDataSource().equals(DataSource.STREAM)) {
1✔
87
      try {
UNCOV
88
        MarcReader reader = getMarcStreamReader(processor.getParameters());
×
UNCOV
89
        processContent(reader, "stream");
×
UNCOV
90
      } catch (Exception e) {
×
91
        logger.severe(e.getLocalizedMessage());
×
92
      }
×
93
    }
94

95
    long duration = System.currentTimeMillis() - start;
1✔
96
    processor.afterIteration(recordNumber, duration);
1✔
97

98
    if (parameters.doLog())
1✔
99
      logger.log(Level.INFO, "Bye! It took: {0}", Utils.formatDuration(duration));
1✔
100

101
    status = "done";
1✔
102
  }
1✔
103

104
  private void processFile(String inputFileName) {
105
    var path = Paths.get(inputFileName);
1✔
106
    String fileName = path.getFileName().toString();
1✔
107

108
    if (processor.getParameters().doLog())
1✔
109
      logger.log(Level.INFO, "processing: {0}", fileName);
1✔
110

111
    try {
112
      processor.fileOpened(path);
1✔
113
      MarcReader reader = getMarcFileReader(processor.getParameters(), path);
1✔
114
      processContent(reader, fileName);
1✔
115
      if (processor.getParameters().doLog())
1✔
116
        logger.log(Level.INFO, "Finished processing file. Processed {0} records.", new Object[]{decimalFormat.format(recordNumber)});
1✔
117

UNCOV
118
    } catch (SolrServerException ex) {
×
119
      if (processor.getParameters().doLog())
×
120
        logger.severe(ex.toString());
×
121
      System.exit(1);
×
122
    } catch (Exception ex) {
×
NEW
123
      if (!processor.getParameters().doLog()) {
×
NEW
124
        logger.log(Level.SEVERE, "error in processFile()", ex);
×
125
        // System.exit(1);
126
      }
127

NEW
128
      logger.severe("Other exception: " + ex);
×
NEW
129
      ex.printStackTrace();
×
130

NEW
131
      for (StackTraceElement element : ex.getStackTrace()) {
×
132
        // logger.severe(element.toString());
NEW
133
        System.err.println(element.toString());
×
134
      }
NEW
135
      Throwable exa = ex;
×
NEW
136
      while (exa.getCause() != null) {
×
NEW
137
        logger.severe("cause");
×
NEW
138
        exa = exa.getCause();
×
NEW
139
        for (StackTraceElement element : exa.getStackTrace()) {
×
UNCOV
140
          System.err.println(element.toString());
×
141
        }
142
      }
143
      // logger.log(Level.SEVERE, "start2", ex);
144
      // System.exit(1);
145
    }
1✔
146
  }
1✔
147

148
  private void processContent(MarcReader reader, String fileName) {
149
    try {
150
      while (reader.hasNext()) {
1✔
151
        if (!processor.readyToProcess()
1✔
152
          || isOverLimit(processor.getParameters().getLimit(), recordNumber)) {
1✔
NEW
153
          break;
×
154
        }
155

156
        try {
157
          IteratorResponse iteratorResponse = getNextMarc4jRecord(recordNumber, lastKnownId, reader);
1✔
158
          recordNumber++;
1✔
159
          processIteratorResponse(iteratorResponse, fileName);
1✔
NEW
160
        } catch (MarcException ex) {
×
NEW
161
          logger.log(Level.SEVERE, "catched MarcException", ex);
×
NEW
162
        } catch (Exception ex) {
×
NEW
163
          logger.log(Level.SEVERE, "catched Exception", ex);
×
164
        }
1✔
165
      }
NEW
166
    } catch (MarcException ex) {
×
NEW
167
      String msg = String.format("Error during processing the file content." +
×
168
          " File: %s, last known record number: %s, last known record identifier: %s",
NEW
169
        fileName, recordNumber, lastKnownId);
×
NEW
170
      logger.log(Level.SEVERE, msg, ex);
×
171
    }
1✔
172
  }
1✔
173

174
  private void processIteratorResponse(IteratorResponse iteratorResponse, String fileName) {
175
    Record marc4jRecord = iteratorResponse.getMarc4jRecord();
1✔
176
    if (marc4jRecord == null) {
1✔
NEW
177
      return;
×
178
    }
179

180
    if (isUnderOffset(processor.getParameters().getOffset(), recordNumber)) {
1✔
NEW
181
      return;
×
182
    }
183

184
    if (marc4jRecord.getControlNumber() == null && !processWithErrors) {
1✔
NEW
185
      logger.log(Level.SEVERE, "No record number at {0}, last known ID: {1}", new Object[]{recordNumber, lastKnownId});
×
NEW
186
      if (marc4jRecord.getLeader() != null) {
×
NEW
187
        logger.severe(marc4jRecord::toString);
×
188
      }
NEW
189
      return;
×
190
    } else {
191
      lastKnownId = marc4jRecord.getControlNumber();
1✔
192
    }
193

194
    if (skipRecord(iteratorResponse.getMarc4jRecord())) {
1✔
NEW
195
      return;
×
196
    }
197

198
    try {
199
      if (processWithErrors && marc4jRecord != null && marc4jRecord.getControlNumberField() == null)
1✔
NEW
200
          marc4jRecord.addVariableField(new ControlFieldImpl("001", "qac" + recordNumber));
×
201

202
      processor.processRecord(marc4jRecord, recordNumber);
1✔
203

204
      // Transform the marc4j record to a bibliographic record
205
      BibliographicRecord bibliographicRecord = iteratorResponse.hasBlockingError()
1✔
206
        ? null
1✔
207
        : transformMarcRecord(marc4jRecord);
1✔
208

209
      try {
210
        if (processWithErrors) {
1✔
211
          processor.processRecord(bibliographicRecord, recordNumber, iteratorResponse.getErrors());
1✔
212
        } else if (bibliographicRecord != null) {
1✔
213
          processor.processRecord(bibliographicRecord, recordNumber);
1✔
214
        }
NEW
215
      } catch(Exception e) {
×
NEW
216
        logger.log(Level.SEVERE, "Problem occured at processor.processRecord()", e);
×
UNCOV
217
        e.printStackTrace();
×
218
      }
1✔
219

220
      if (recordNumber % 100000 == 0 && processor.getParameters().doLog()) {
1✔
NEW
221
        logger.log(Level.INFO, "{0}/{1} ({2})", new Object[]{
×
222
          fileName,
NEW
223
          decimalFormat.format(recordNumber),
×
NEW
224
          (bibliographicRecord != null ? bibliographicRecord.getId() : "unknown")});
×
225
      }
226

NEW
227
    } catch (IllegalArgumentException e) {
×
NEW
228
      extracted(recordNumber, marc4jRecord, e, "Error (illegal argument) with record '%s'. %s");
×
NEW
229
    } catch (Exception e) {
×
NEW
230
      e.printStackTrace();
×
NEW
231
      extracted(recordNumber, marc4jRecord, e, "Error (general) with record '%s'. %s");
×
232
    }
1✔
233
  }
1✔
234

235
  private BibliographicRecord transformMarcRecord(Record marc4jRecord) {
236
    if (parameters.getSchemaType().equals(SchemaType.MARC21)) {
1✔
237
      return MarcFactory.createFromMarc4j(marc4jRecord, defaultRecordType, marcVersion, replacementInControlFields);
1✔
238
    } else if (parameters.getSchemaType().equals(SchemaType.PICA)) {
1✔
239
      return MarcFactory.createPicaFromMarc4j(marc4jRecord, picaSchema);
1✔
240
    } else {
241
      return MarcFactory.createUnimarcFromMarc4j(marc4jRecord, defaultRecordType, unimarcSchema);
1✔
242
    }
243
  }
244

245
  private MarcReader getMarcFileReader(CommonParameters parameters, Path path) throws Exception {
246
    MarcReader marcReader;
247
    if (path.toString().endsWith(".gz")) {
1✔
248
      marcReader = QAMarcReaderFactory.getStreamReader(
1✔
249
        parameters.getMarcFormat(),
1✔
250
        new GZIPInputStream(new FileInputStream(path.toFile())),
1✔
251
        parameters);
252
    } else {
253
      marcReader = QAMarcReaderFactory.getFileReader(parameters.getMarcFormat(), path.toString(), parameters);
1✔
254
    }
255
    if (parameters.getAlephseqLineType() != null && marcReader instanceof AlephseqMarcReader) {
1✔
UNCOV
256
      ((AlephseqMarcReader) marcReader).setLineType(parameters.getAlephseqLineType());
×
257
    }
258
    return marcReader;
1✔
259
  }
260

261
  private MarcReader getMarcStreamReader(CommonParameters parameters) {
UNCOV
262
    return QAMarcReaderFactory.getStreamReader(parameters.getMarcFormat(), parameters.getStream(), parameters);
×
263
  }
264

265
  private IteratorResponse getNextMarc4jRecord(int i, String lastKnownId, MarcReader reader) {
266
    IteratorResponse response = new IteratorResponse();
1✔
267
    try {
268
      response.setMarc4jRecord(reader.next());
1✔
269
      if (reader instanceof ErrorAwareReader) {
1✔
270
        ErrorAwareReader errorAwareReader = (ErrorAwareReader) reader;
1✔
271
        response.setErrors(errorAwareReader.getErrors());
1✔
272
        response.hasBlockingError(errorAwareReader.hasBlockingError());
1✔
273
      }
UNCOV
274
    } catch (MarcException | NegativeArraySizeException | NumberFormatException e) {
×
UNCOV
275
      response.addError(lastKnownId, e.getLocalizedMessage());
×
UNCOV
276
      String msg = String.format("MARC record parsing problem at record #%d (last known ID: %s): %s",
×
UNCOV
277
              (i + 1), lastKnownId, e.getLocalizedMessage());
×
UNCOV
278
      logger.severe(msg);
×
UNCOV
279
    } catch (Exception e) {
×
UNCOV
280
      response.addError(lastKnownId, e.getLocalizedMessage());
×
NEW
281
      logger.log(Level.SEVERE, "error in getNextMarc4jRecord()", e);
×
282
    }
1✔
283
    return response;
1✔
284
  }
285

286
  private boolean skipRecord(Record marc4jRecord) {
287
    return processor.getParameters().hasId()
1✔
288
      && !marc4jRecord.getControlNumber().trim().equals(processor.getParameters().getId());
1✔
289
  }
290

291
  private void extracted(int i, Record marc4jRecord, Exception e, String message) {
UNCOV
292
    if (marc4jRecord.getControlNumber() == null)
×
NEW
293
      logger.log(Level.SEVERE, "No record number at {0}", i);
×
UNCOV
294
    if (processor.getParameters().doLog())
×
NEW
295
      logger.log(Level.SEVERE, String.format(message, marc4jRecord.getControlNumber(), e.getMessage()));
×
NEW
296
    logger.log(Level.SEVERE, "error in extracted()", e);
×
UNCOV
297
  }
×
298

299
  private static boolean isOverLimit(int limit, int i) {
300
    return limit > -1 && i > limit;
1✔
301
  }
302

303
  private static boolean isUnderOffset(int offset, int i) {
304
    return offset > -1 && i < offset;
1✔
305
  }
306

307
  private static void printHelp(Options opions) {
UNCOV
308
    HelpFormatter formatter = new HelpFormatter();
×
UNCOV
309
    formatter.printHelp("java -cp qa-catalogue.jar de.gwdg.metadataqa.marc.cli.Validator [options] [file]",
×
310
      opions);
311
  }
×
312

313
  private void setupSchema() {
314
    // This should probably be moved to a factory of some sort if the schema managers, field definitions and field
315
    // implementations get refactored in a way that they inherit from common interfaces.
316
    // That's a bit of a long shot though, so for now we'll just keep it here.
317
    if (parameters.isPica()) {
1✔
318
      picaSchema = PicaSchemaReader.createSchemaManager(parameters.getPicaSchemaFile());
1✔
319
    } else if (parameters.isUnimarc()) {
1✔
320
      UnimarcSchemaReader unimarcSchemaReader = new UnimarcSchemaReader();
1✔
321
      String schemaFilePath = parameters.getPicaSchemaFile();
1✔
322
      if (schemaFilePath == null) {
1✔
323
        schemaFilePath = "src/main/resources/unimarc/avram-unimarc.json";
1✔
324
      }
325
      unimarcSchema = unimarcSchemaReader.createSchema(schemaFilePath);
1✔
326
    }
327
  }
1✔
328

329
  public String getStatus() {
330
    return status;
1✔
331
  }
332

333
  public void setProcessWithErrors(boolean processWithErrors) {
334
    this.processWithErrors = processWithErrors;
1✔
335
  }
1✔
336

337
  public long getStart() {
UNCOV
338
    return start;
×
339
  }
340
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc