• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pkiraly / metadata-qa-marc / #1527

22 Aug 2025 02:21PM UTC coverage: 90.345%. Remained the same
#1527

push

pkiraly
Improve timeline handling

5191 of 6416 new or added lines in 219 files covered. (80.91%)

886 existing lines in 78 files now uncovered.

36717 of 40641 relevant lines covered (90.34%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

43.06
/src/main/java/de/gwdg/metadataqa/marc/cli/MarcToSolr.java
1
package de.gwdg.metadataqa.marc.cli;
2

3
import de.gwdg.metadataqa.marc.MarcSubfield;
4
import de.gwdg.metadataqa.marc.cli.parameters.CommonParameters;
5
import de.gwdg.metadataqa.marc.cli.parameters.MarcToSolrParameters;
6
import de.gwdg.metadataqa.marc.cli.processor.BibliographicInputProcessor;
7
import de.gwdg.metadataqa.marc.cli.utils.RecordIterator;
8
import de.gwdg.metadataqa.marc.dao.DataField;
9
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
10
import de.gwdg.metadataqa.marc.datastore.MarcSolrClient;
11
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;
12
import de.gwdg.metadataqa.marc.definition.general.indexer.FieldIndexer;
13
import de.gwdg.metadataqa.marc.model.validation.ValidationError;
14
import de.gwdg.metadataqa.marc.utils.Counter;
15
import de.gwdg.metadataqa.marc.utils.keygenerator.DataFieldKeyGenerator;
16
import de.gwdg.metadataqa.marc.utils.pica.PicaFieldDefinition;
17
import de.gwdg.metadataqa.marc.utils.pica.PicaGroupIndexer;
18
import de.gwdg.metadataqa.marc.utils.pica.path.PicaPath;
19
import org.apache.commons.cli.HelpFormatter;
20
import org.apache.commons.cli.Options;
21
import org.apache.commons.cli.ParseException;
22
import org.apache.commons.lang3.StringUtils;
23
import org.apache.solr.common.SolrDocument;
24
import org.apache.solr.common.SolrInputDocument;
25
import org.marc4j.marc.Record;
26

27
import java.io.IOException;
28
import java.io.Serializable;
29
import java.nio.file.Path;
30
import java.text.DecimalFormat;
31
import java.util.ArrayList;
32
import java.util.Collections;
33
import java.util.HashMap;
34
import java.util.List;
35
import java.util.Map;
36
import java.util.logging.Logger;
37

38
/**
39
 * usage:
40
 * java -cp target/qa-catalogue-0.1-SNAPSHOT-jar-with-dependencies.jar de.gwdg.metadataqa.marc.cli.SolrKeyGenerator http://localhost:8983/solr/tardit 0001.0000000.formatted.json
41
 *
42
 * @author Péter Király <peter.kiraly at gwdg.de>
43
 */
44
public class MarcToSolr extends QACli<MarcToSolrParameters> implements BibliographicInputProcessor, Serializable {
45

46
  private static final Logger logger = Logger.getLogger(
1✔
47
    MarcToSolr.class.getCanonicalName()
1✔
48
  );
49
  private Options options;
50
  private MarcSolrClient client;
51
  private MarcSolrClient validationClient;
52
  private Path currentFile;
53
  private boolean readyToProcess;
54
  private final DecimalFormat decimalFormat = new DecimalFormat();
1✔
55
  private FieldIndexer groupIndexer;
56
  private final Map<String, String> escapedTagCache = new HashMap<>();
1✔
57

58
  public MarcToSolr(String[] args) throws ParseException {
×
59
    parameters = new MarcToSolrParameters(args);
×
60
    initialize();
×
61
  }
×
62

63
  public MarcToSolr(MarcToSolrParameters parameters) {
1✔
64
    this.parameters = parameters;
1✔
65
    initialize();
1✔
66
  }
1✔
67

68
  private void initialize() {
69
    options = parameters.getOptions();
1✔
70

71
    client = parameters.isUseEmbedded()
1✔
72
      ? new MarcSolrClient(parameters.getMainClient())
1✔
73
      : new MarcSolrClient(parameters.getSolrUrl());
1✔
74
    client.setTrimId(parameters.getTrimId());
1✔
75
    client.indexWithTokenizedField(parameters.isIndexWithTokenizedField());
1✔
76

77
    if (parameters.getFieldPrefix() != null) {
1✔
78
      client.setFieldPrefix(parameters.getFieldPrefix());
1✔
79
    }
80

81
    if (parameters.getSolrForScoresUrl() != null) {
1✔
82
      validationClient = parameters.isUseEmbedded()
1✔
83
        ? new MarcSolrClient(parameters.getValidationClient())
1✔
84
        : new MarcSolrClient(parameters.getSolrForScoresUrl());
1✔
85
      validationClient.setTrimId(parameters.getTrimId());
1✔
86

87
      if (parameters.getFieldPrefix() != null) {
1✔
88
        validationClient.setFieldPrefix(parameters.getFieldPrefix());
1✔
89
      }
90
    }
91

92
    readyToProcess = true;
1✔
93
    initializeGroups(parameters.getGroupBy(), parameters.isPica());
1✔
94
    if (doGroups()) {
1✔
UNCOV
95
      groupIndexer = new PicaGroupIndexer().setPicaPath((PicaPath) groupBy);
×
96
    }
97
  }
1✔
98

99
  public static void main(String[] args) {
100
    try {
UNCOV
101
      MarcToSolr processor = new MarcToSolr(args);
×
UNCOV
102
      if (StringUtils.isBlank(((MarcToSolrParameters) processor.getParameters()).getSolrUrl())) {
×
NEW
103
        logger.severe("Please provide a Solr URL and file name!");
×
104
        System.exit(1);
×
105
      }
106

UNCOV
107
      RecordIterator iterator = new RecordIterator(processor);
×
NEW
108
      iterator.setProcessWithErrors(processor.getParameters().getProcessRecordsWithoutId());
×
UNCOV
109
      iterator.start();
×
NEW
110
      System.exit(0);
×
111
    } catch(Exception e) {
×
NEW
112
      logger.severe(() -> "ERROR. " + e.getLocalizedMessage());
×
113
      System.exit(1);
×
114
    }
×
115
  }
×
116

117
  @Override
118
  public CommonParameters getParameters() {
119
    return parameters;
1✔
120
  }
121

122
  @Override
123
  public void processRecord(Record marc4jRecord, int recordNumber) throws IOException {
124
    // do nothing
125
  }
1✔
126

127
  @Override
128
  public void processRecord(BibliographicRecord bibliographicRecord, int recordNumber, List<ValidationError> errors) throws IOException {
NEW
129
    processRecord(bibliographicRecord, recordNumber);
×
UNCOV
130
  }
×
131

132
  @Override
133
  public void processRecord(BibliographicRecord bibliographicRecord, int recordNumber) throws IOException {
134
    if (parameters.getRecordIgnorator().isIgnorable(bibliographicRecord))
1✔
UNCOV
135
      return;
×
136

137
    if (bibliographicRecord.getSchemaType().equals(SchemaType.PICA) && doGroups()) {
1✔
NEW
138
      for (DataField groupField : bibliographicRecord.getDatafieldsByTag(((PicaPath) groupBy).getTag())) {
×
139
        groupField.addFieldIndexer(groupIndexer);
×
NEW
140
      }
×
141
    }
142

143
    Map<String, List<String>> keyValuePairs = bibliographicRecord.getKeyValuePairs(
1✔
144
      parameters.getSolrFieldType(), true, parameters.getMarcVersion()
1✔
145
    );
146

147
    // Add the record itself as a field to the index
148
    keyValuePairs.put("record_sni", Collections.singletonList(bibliographicRecord.asJson()));
1✔
149

150
    // logger.info(bibliographicRecord.getId());
151
    SolrInputDocument solrDocument = client.createSolrDoc(bibliographicRecord.getId(), keyValuePairs);
1✔
152
    if (validationClient != null) {
1✔
153
      indexValidationResults(bibliographicRecord, solrDocument);
1✔
154
    }
155

156
    if (parameters.isIndexFieldCounts() || parameters.isIndexSubfieldCounts()) {
1✔
157
      indexFieldCounts(bibliographicRecord, solrDocument);
×
158
    }
159

160
    try {
161
      client.index(solrDocument);
1✔
NEW
162
    } catch (Exception e) {
×
NEW
163
      logger.severe(() -> "ERROR while index." + e.getLocalizedMessage());
×
164
    }
1✔
165

166
    if (recordNumber % parameters.getCommitAt() != 0) {
1✔
167
      return;
1✔
168
    }
169

NEW
170
    if (parameters.isDoCommit()) {
×
NEW
171
      logger.info("do commit @" + recordNumber);
×
NEW
172
      client.commit();
×
NEW
173
      long indexedRecordCount = client.getCount();
×
NEW
174
      if (recordNumber != indexedRecordCount) {
×
NEW
175
        logger.severe(String.format("recordNumber: %d != indexedRecordCount: %d", recordNumber, indexedRecordCount));
×
176
      }
NEW
177
      logger.info("/do commit @" + recordNumber);
×
178
    }
179

NEW
180
    String logMessage = String.format(
×
181
      "%s/%s (%s)",
NEW
182
      currentFile.getFileName().toString(),
×
NEW
183
      decimalFormat.format(recordNumber),
×
NEW
184
      bibliographicRecord.getId()
×
185
    );
NEW
186
    logger.info(logMessage);
×
UNCOV
187
  }
×
188

189
  private void indexValidationResults(BibliographicRecord bibliographicRecord, SolrInputDocument document) {
190
    SolrDocument validationValues = validationClient.get(bibliographicRecord.getId());
1✔
191
    if (validationValues == null || validationValues.isEmpty()) {
1✔
NEW
192
      return;
×
193
    }
194

195
    for (String field : validationValues.getFieldNames()) {
1✔
196
      document.addField(field, validationValues.getFieldValues(field));
1✔
197
    }
1✔
198
  }
1✔
199

200
  /**
201
   * Index field and subfield counts. The solr field will look like <tag>_count_i and
202
   * <tag><subfield code>_count_i, the value will be the number of times this element is
203
   * available in the record.
204
   *
205
   * @param bibliographicRecord The bibliographic record
206
   * @param document The Solr document
207
   */
208
  private void indexFieldCounts(BibliographicRecord bibliographicRecord,
209
                                SolrInputDocument document) {
NEW
210
    Counter<String> counter = new Counter<>();
×
UNCOV
211
    boolean isPica = bibliographicRecord.getSchemaType().equals(SchemaType.PICA);
×
NEW
212
    Map<String, List<Integer>> subfields = new HashMap<>();
×
UNCOV
213
    for (DataField field : bibliographicRecord.getDatafields()) {
×
214
      String tag;
UNCOV
215
      if (field.getDefinition() != null) {
×
216
        tag = isPica
×
217
          ? ((PicaFieldDefinition)field.getDefinition()).getId()
×
218
          : field.getDefinition().getTag();
×
219
      } else {
220
        tag = field.getTag();
×
221
      }
NEW
222
      String safeTag = escape(tag);
×
NEW
223
      if (parameters.isIndexFieldCounts())
×
NEW
224
        counter.count(safeTag);
×
225

NEW
226
      if (parameters.isIndexSubfieldCounts()) {
×
NEW
227
        Counter<String> subfieldCounter = new Counter<>();
×
NEW
228
        for (MarcSubfield subfield : field.getSubfields()) {
×
NEW
229
          String safeSubfieldCode = DataFieldKeyGenerator.escape(subfield.getCode());
×
NEW
230
          subfieldCounter.count(safeTag + safeSubfieldCode);
×
NEW
231
        }
×
NEW
232
        for (Map.Entry<String, Integer> entry : subfieldCounter.entrySet()) {
×
NEW
233
          if (!subfields.containsKey(entry.getKey()))
×
NEW
234
            subfields.put(entry.getKey(), new ArrayList<>());
×
NEW
235
          subfields.get(entry.getKey()).add(entry.getValue());
×
NEW
236
        }
×
237
      }
NEW
238
    }
×
NEW
239
    for (Map.Entry<String, Integer> entry : counter.entrySet()) {
×
NEW
240
      document.addField(String.format(
×
241
        "%s%s_count_i",
NEW
242
        parameters.getFieldPrefix(), entry.getKey()), entry.getValue());
×
NEW
243
    }
×
244

NEW
245
    if (parameters.isIndexSubfieldCounts()) {
×
NEW
246
      for (Map.Entry<String, List<Integer>> entry : subfields.entrySet()) {
×
NEW
247
        document.addField(String.format(
×
248
          "%s%s_count_is",
NEW
249
          parameters.getFieldPrefix(), entry.getKey()), entry.getValue());
×
NEW
250
      }
×
251
    }
UNCOV
252
  }
×
253

254
  private String escape(String tag) {
NEW
255
    escapedTagCache.putIfAbsent(tag, DataFieldKeyGenerator.escape(tag));
×
256
    return escapedTagCache.get(tag);
×
257
  }
258

259
  @Override
260
  public void beforeIteration() {
261
    logger.info(() -> parameters.formatParameters());
1✔
262
    parameters.setMainClient(null);
1✔
263
    parameters.setValidationClient(null);
1✔
264
  }
1✔
265

266
  @Override
267
  public void fileOpened(Path path) {
268
    currentFile = path;
1✔
269
  }
1✔
270

271
  @Override
272
  public void fileProcessed() {
273
    // Do nothing
UNCOV
274
  }
×
275

276
  @Override
277
  public void afterIteration(int numberOfprocessedRecords, long duration) {
278
    client.commit();
1✔
279
    logger.info(parameters.toString());
1✔
280
    saveParameters(
1✔
281
      "marctosolr.params.json",
282
      parameters,
283
      Map.of(
1✔
284
        "numberOfprocessedRecords", numberOfprocessedRecords,
1✔
285
        "duration", duration
1✔
286
      )
287
    );
288
  }
1✔
289

290
  @Override
291
  public void printHelp(Options options) {
UNCOV
292
    HelpFormatter formatter = new HelpFormatter();
×
293
    String message = String.format("java -cp qa-catalogue.jar %s [options] [file]", this.getClass().getCanonicalName());
×
294
    formatter.printHelp(message, options);
×
295
  }
×
296

297
  @Override
298
  public boolean readyToProcess() {
299
    return readyToProcess;
1✔
300
  }
301
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc