• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pkiraly / metadata-qa-marc / #1527

22 Aug 2025 02:21PM UTC coverage: 90.345%. Remained the same
#1527

push

pkiraly
Improve timeline handling

5191 of 6416 new or added lines in 219 files covered. (80.91%)

886 existing lines in 78 files now uncovered.

36717 of 40641 relevant lines covered (90.34%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.59
/src/main/java/de/gwdg/metadataqa/marc/cli/utils/TranslationModel.java
1
package de.gwdg.metadataqa.marc.cli.utils;
2

3
import de.gwdg.metadataqa.api.model.XmlFieldInstance;
4
import de.gwdg.metadataqa.api.rule.RuleCheckerOutput;
5
import de.gwdg.metadataqa.api.rule.RuleCheckingOutputStatus;
6
import de.gwdg.metadataqa.marc.EncodedValue;
7
import de.gwdg.metadataqa.marc.MarcSubfield;
8
import de.gwdg.metadataqa.marc.cli.utils.placename.PlaceName;
9
import de.gwdg.metadataqa.marc.cli.utils.placename.PlaceNameNormaliser;
10
import de.gwdg.metadataqa.marc.cli.utils.translation.ContributorNormaliser;
11
import de.gwdg.metadataqa.marc.cli.utils.translation.PublicationYearNormaliser;
12
import de.gwdg.metadataqa.marc.dao.DataField;
13
import de.gwdg.metadataqa.marc.definition.MarcVersion;
14
import de.gwdg.metadataqa.marc.definition.general.codelist.LanguageCodes;
15
import org.apache.commons.lang3.StringUtils;
16

17
import java.text.Normalizer;
18
import java.util.ArrayList;
19
import java.util.Arrays;
20
import java.util.HashMap;
21
import java.util.List;
22
import java.util.Map;
23
import java.util.logging.Logger;
24
import java.util.regex.Pattern;
25
import java.util.stream.Collectors;
26

27
/**
28
 * To validate the result, and decide if it is a translation
29
 */
30
public class TranslationModel {
31
  private static final Logger logger = Logger.getLogger(TranslationModel.class.getCanonicalName());
1✔
32

33
  private static final Pattern yearPattern = Pattern.compile("^\\d{4}$");
1✔
34

35
  private static Map<String, String> languageCodes = TranslationModel.toMap(List.of(
1✔
36
    "jap", "Japanese", "fra", "French", "fr", "French", "hu", "Hungarian", "esp", "Spanish",
37
    "srb", "Serbian"
38
  ));
39

40

41
  private final BibSelector selector;
42
  private final Map<String, RuleCheckerOutput> resultMap;
43
  private final PlaceNameNormaliser placeNameNormaliser;
44
  private final PublicationYearNormaliser yearNormaliser;
45
  private final MarcVersion marcVersion;
46
  private final ContributorNormaliser contributorNormaliser;
47

48
  private boolean translation;
49
  private boolean translator;
50
  private boolean sourceLanguage;
51
  private boolean targetLanguage;
52
  private boolean originalTitle;
53
  private boolean originalPublication;
54

55
  /**
56
   * @param resultMap           The results of the measurement
57
   * @param selector            The data object that contains selected data elements of the bib record
58
   * @param placeNameNormaliser
59
   * @param marcVersion
60
   */
61
  public TranslationModel(Map<String, RuleCheckerOutput> resultMap,
62
                          BibSelector selector,
63
                          PlaceNameNormaliser placeNameNormaliser,
64
                          PublicationYearNormaliser yearNormaliser,
65
                          ContributorNormaliser contributorNormaliser,
66
                          MarcVersion marcVersion) {
1✔
67
    this.resultMap = resultMap;
1✔
68
    this.selector = selector;
1✔
69
    this.placeNameNormaliser = placeNameNormaliser;
1✔
70
    this.yearNormaliser = yearNormaliser;
1✔
71
    this.contributorNormaliser = contributorNormaliser;
1✔
72
    this.marcVersion = marcVersion;
1✔
73
    evaluate();
1✔
74
  }
1✔
75

76
  public static List<String> header() {
77
    return List.of(
1✔
78
      "translator", "sourceLanguage", "targetLanguage",
79
      "originalTitle", "originalPublication", "translation"
80
    );
81
  }
82

83
  public List<Integer> values() {
84
    return List.of(
1✔
85
      translator, sourceLanguage, targetLanguage,
1✔
86
      originalTitle, originalPublication, translation
1✔
87
    ).stream().map(s -> s.compareTo(false)).collect(Collectors.toList());
1✔
88
  }
89

90
  public Map<String, Object> extract() {
91
    Map<String, Object> extracted = new HashMap<>();
1✔
92
    // sourceLanguage
93
    // System.err.println(extract("041$h"));
94
    extracted.put("sourceLanguage", extract("041$h"));
1✔
95
    // targetLanguage
96
    // System.err.println(extract("041$a"));
97
    extracted.put("targetLanguage", extract("041$a"));
1✔
98
    // Language of a work
99
    // System.err.println(extract("240$l"));
100
    // System.err.println(extract("260$a"));
101
    extracted.put("author", extract("100$a"));
1✔
102
    extracted.put("publicationPlace", extract("260$a"));
1✔
103
    extracted.put("publicationYear", extract("260$c"));
1✔
104
    return extracted;
1✔
105
  }
106

107
  private List<? extends Object> extract(String path) {
108
    List<XmlFieldInstance> instances = new ArrayList<>();
1✔
109
    if (path.equals("100$a") && marcVersion == MarcVersion.HUNMARC) {
1✔
NEW
110
      instances = getAuthorsFromHunmarc();
×
111
    } else {
112
      instances = selector.get(path);
1✔
113
    }
114
    if (path.equals("260$a")) {
1✔
115
      instances.addAll(selector.get("264$a"));
1✔
116
    } else if (path.equals("260$c")) {
1✔
117
      instances.addAll(selector.get("264$c"));
1✔
118
    } else if (path.equals("100$a") && instances.size() == 0) {
1✔
NEW
119
      contributorNormaliser.process(selector.get("245$c"));
×
NEW
120
      instances.addAll(selector.get("245$c"));
×
121
    }
122
    List<String> extracted = new ArrayList<>();
1✔
123
    if (path == "041$h" || path == "041$a") {
1✔
124
      for (XmlFieldInstance instance : instances) {
1✔
125
        String value = instance.getValue();
1✔
126
        if (value != null) {
1✔
127
          value = value.trim();
1✔
128
          if (value.contains(", "))
1✔
NEW
129
            extracted.addAll(Arrays.asList(value.split(", "))
×
NEW
130
              .stream()
×
NEW
131
              .map(s -> resultLanguageCode(s.trim().toLowerCase()))
×
NEW
132
              .collect(Collectors.toList()));
×
133
          else if (!value.contains(" ")) {
1✔
134
            if (value.length() > 3) {
1✔
135
              for (int i = 0; i < value.length(); i += 3) {
1✔
136
                int end = i + 3;
1✔
137
                String abr = (value.length() >= end)
1✔
138
                  ? value.substring(i, end).toLowerCase()
1✔
139
                  : value.substring(i).toLowerCase();
1✔
140
                extracted.add(resultLanguageCode(abr));
1✔
141
              }
142
            } else {
143
              extracted.add(resultLanguageCode(value.toLowerCase()));
1✔
144
            }
145
          } else {
NEW
146
            logger.warning(String.format("%s - Unhandled language: '%s'", path, value));
×
147
          }
148
        }
149
      }
1✔
150
      extracted = extracted.stream()
1✔
151
        .distinct()
1✔
152
        .filter(s -> !s.equals("und"))
1✔
153
        .collect(Collectors.toList());
1✔
154
    } else {
155
      extracted = instances.stream()
1✔
156
        .map(XmlFieldInstance::getValue)
1✔
157
        .map(s -> Normalizer.normalize(s, Normalizer.Form.NFKC))
1✔
158
        .collect(Collectors.toList());
1✔
159
      if (path.equals("240$l")) {
1✔
NEW
160
        extracted = extracted.stream()
×
NEW
161
          .map(s -> s.replaceAll("\\.$", ""))
×
NEW
162
          .collect(Collectors.toList());
×
163
      } else if (path.equals("260$a") && placeNameNormaliser != null) {
1✔
164
        List<PlaceName> placeNames = processPlaceName(extracted);
1✔
165
        if (placeNames.isEmpty()) {
1✔
166
          return new ArrayList<>();
1✔
167
        } else {
168
          for (PlaceName p : placeNames) {
1✔
169
            if (p == null || p.getCity() == null) {
1✔
NEW
170
              logger.warning(String.format("%s - null in place name: '%s'", path, StringUtils.join(extracted, "' -- '")));
×
171
            }
172
          }
1✔
173
          return placeNames.stream()
1✔
174
            .filter(s -> s.getCity() != null)
1✔
175
            .map(PlaceName::getCity)
1✔
176
            .collect(Collectors.toList());
1✔
177
        }
178
      } else if (path.equals("260$c") && yearNormaliser != null) {
1✔
179
        return yearNormaliser.processYear(extracted);
1✔
180
      }
181
    }
182
    return extracted;
1✔
183
  }
184

185
  private List<XmlFieldInstance> getAuthorsFromHunmarc() {
186
    // System.err.println(selector.getClass());
NEW
187
    List<String> names = new ArrayList<>();
×
NEW
188
    MarcSpecSelector mSelector = (MarcSpecSelector) selector;
×
NEW
189
    for (Object f : mSelector.extract("100")) {
×
NEW
190
      StringBuilder name = new StringBuilder();
×
NEW
191
      DataField field = (DataField) f;
×
NEW
192
      List<MarcSubfield> a = field.getSubfield("a");
×
NEW
193
      if (a != null && !a.isEmpty()) {
×
NEW
194
        if (a.size() == 1) {
×
NEW
195
          name.append(a.get(0).getValue());
×
196
        } else {
NEW
197
          logger.warning("Multiple 100$a: " + a);
×
198
        }
199
      }
NEW
200
      List<MarcSubfield> j = field.getSubfield("j");
×
NEW
201
      if (j != null && !j.isEmpty()) {
×
NEW
202
        name.append(" ").append(j.get(0).getValue());
×
NEW
203
        if (j.size() > 1) {
×
NEW
204
          logger.warning("Multiple 100$j: " + j);
×
205
        }
206
      }
NEW
207
      if (name.length() > 0) {
×
208
        // logger.info(String.format("a: '%s', j: '%s' -> '%s'", a.toString(), j.toString(), name.toString()));
NEW
209
        names.add(name.toString());
×
210
      }
NEW
211
    }
×
NEW
212
    return new ArrayList<>();
×
213
  }
214

215
  private static String resultLanguageCode(String abbreviation) {
216
    EncodedValue code = LanguageCodes.getInstance().getCode(abbreviation);
1✔
217
    if (code != null) {
1✔
218
      return code.getLabel();
1✔
NEW
219
    } else if (languageCodes.containsKey(abbreviation)) {
×
NEW
220
      return languageCodes.get(abbreviation);
×
221
    } else {
NEW
222
      return abbreviation;
×
223
    }
224
  }
225

226
  private List<PlaceName> processPlaceName(List<String> input) {
227
    return placeNameNormaliser.normalise(input);
1✔
228
    /*
229
    if (!knownMultiwordCities.contains(result) && Pattern.matches("^.*[^a-zA-Zøäșóō].*$", result)) {
230
      System.err.println(String.format("'%s' -> '%s'",
231
        StringUtils.join(input, ", "), StringUtils.join(extracted, ", ")));
232
    }
233
     */
234
  }
235

236
  private void evaluate() {
237
    if (passed("041ind1"))
1✔
238
      translation = true;
1✔
239

240
    if (passed("041h")) {
1✔
241
      translation = true;
1✔
242
      sourceLanguage = true;
1✔
243
    }
244

245
    if (passed("041a")) {
1✔
246
      targetLanguage = true;
1✔
247
    }
248

249
    if (passed("245c")) {
1✔
250
      translation = true;
1✔
251
      translator = true;
1✔
252
    }
253

254
    if (passed("7004")) {
1✔
NEW
255
      translation = true;
×
NEW
256
      translator = true;
×
257
    }
258

259
    if (passed("700e")) {
1✔
NEW
260
      translation = true;
×
NEW
261
      translator = true;
×
262
    }
263

264
    if (passed("500a")) {
1✔
265
      translation = true;
1✔
266
      translator = true;
1✔
267
    }
268

269
    if (passed("240a")) {
1✔
270
      translation = true;
1✔
271
      originalTitle = true;
1✔
272
    }
273

274
    // TODO: maybe the value should check against other language code
275
    if (passed("240l")) {
1✔
276
      translation = true;
1✔
277
      originalTitle = true;
1✔
278
    }
279

280
    if (passed("765ind2")) {
1✔
NEW
281
      translation = true;
×
282
    }
283

284
    if (passed("765t")) {
1✔
NEW
285
      translation = true;
×
NEW
286
      originalTitle = true;
×
287
    }
288

289
    if (passed("765s")) {
1✔
NEW
290
      translation = true;
×
NEW
291
      originalTitle = true;
×
292
    }
293

294
    if (passed("765d")) {
1✔
NEW
295
      translation = true;
×
NEW
296
      originalPublication = true;
×
297
    }
298
  }
1✔
299

300
  /**
301
   * Check if the rule has been passed the test
302
   * @param path
303
   * @return
304
   */
305
  public boolean passed(String path) {
306
    return resultMap.containsKey(path) && resultMap.get(path).getStatus().equals(RuleCheckingOutputStatus.PASSED);
1✔
307
  }
308

309
  public boolean isTranslation() {
310
    return translation;
1✔
311
  }
312

313
  private static Map<String, String> toMap(List<String> input) {
314
    Map<String, String> map = new HashMap<>();
1✔
315
    for (int i = 0; i < input.size(); i+=2) {
1✔
316
      map.put(input.get(i), input.get(i+1));
1✔
317
    }
318
    return map;
1✔
319
  }
320
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc