• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #24096

18 Dec 2024 08:44PM UTC coverage: 22.583% (+0.01%) from 22.573%
#24096

Pull #10790

github

web-flow
Merge 0d75b881b into ed391eb99
Pull Request #10790: fix: issues in exporters and citations for PermaLink/non-DOI PIDs

48 of 69 new or added lines in 7 files covered. (69.57%)

1 existing line in 1 file now uncovered.

19560 of 86614 relevant lines covered (22.58%)

0.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

61.64
/src/main/java/edu/harvard/iq/dataverse/DataCitation.java
1
/*
2
 * To change this license header, choose License Headers in Project Properties.
3
 * To change this template file, choose Tools | Templates
4
 * and open the template in the editor.
5
 */
6
package edu.harvard.iq.dataverse;
7

8
import edu.harvard.iq.dataverse.branding.BrandingUtil;
9
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
10
import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider;
11

12
import java.io.BufferedWriter;
13
import java.io.ByteArrayOutputStream;
14
import java.io.IOException;
15
import java.io.OutputStream;
16
import java.io.OutputStreamWriter;
17
import java.io.Writer;
18
import java.nio.charset.StandardCharsets;
19
import java.text.SimpleDateFormat;
20
import java.util.ArrayList;
21
import java.util.Date;
22
import java.util.HashMap;
23
import java.util.List;
24
import java.util.Map;
25
import java.util.logging.Level;
26
import java.util.logging.Logger;
27
import java.util.regex.Matcher;
28
import java.util.regex.Pattern;
29
import java.util.stream.Collectors;
30

31
import jakarta.ejb.EJBException;
32
import javax.xml.stream.XMLOutputFactory;
33
import javax.xml.stream.XMLStreamException;
34
import javax.xml.stream.XMLStreamWriter;
35

36
import edu.harvard.iq.dataverse.util.BundleUtil;
37
import edu.harvard.iq.dataverse.util.DateUtil;
38
import org.apache.commons.text.StringEscapeUtils;
39
import org.apache.commons.lang3.StringUtils;
40

41
import static edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider.DOI_PROTOCOL;
42
import static edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider.HDL_PROTOCOL;
43
import static edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider.PERMA_PROTOCOL;
44

45
/**
46
 *
47
 * @author gdurand, qqmyers
48
 */
49
public class DataCitation {
50

51
    private static final Logger logger = Logger.getLogger(DataCitation.class.getCanonicalName());
1✔
52

53
    private List<String> authors = new ArrayList<String>();
1✔
54
    private List<String> producers = new ArrayList<String>();
1✔
55
    private String title;
56
    private String fileTitle = null;
1✔
57
    private String year;
58
    private Date date;
59
    private GlobalId persistentId;
60
    private String version;
61
    private String UNF = null;
1✔
62
    private String publisher;
63
    private boolean direct;
64
    private List<String> funders;
65
    private List<String> seriesTitles;
66
    private String description;
67
    private List<String> datesOfCollection;
68
    private List<String> keywords;
69
    private List<String> kindsOfData;
70
    private List<String> languages;
71
    private List<String> spatialCoverages;
72

73
    private List<DatasetField> optionalValues = new ArrayList<>();
1✔
74
    private int optionalURLcount = 0; 
1✔
75

76
    public DataCitation(DatasetVersion dsv) {
77
        this(dsv, false);
1✔
78
    }
1✔
79

80

81
    public DataCitation(DatasetVersion dsv, boolean direct) {
1✔
82
        this.direct = direct;
1✔
83
        getCommonValuesFrom(dsv);
1✔
84

85
        // The Global Identifier: 
86
        // It is always part of the citation for the local datasets; 
87
        // And for *some* harvested datasets. 
88
        persistentId = getPIDFrom(dsv, dsv.getDataset());
1✔
89

90
        // UNF
91
        UNF = dsv.getUNF();
1✔
92

93
        // optional values
94
        for (DatasetFieldType dsfType : dsv.getDataset().getOwner().getCitationDatasetFieldTypes()) {
1✔
95
            DatasetField dsf = dsv.getDatasetField(dsfType);
×
96
            if (dsf != null) {
×
97
                optionalValues.add(dsf);
×
98
                
99
                if (dsf.getDatasetFieldType().getFieldType().equals(DatasetFieldType.FieldType.URL)) {
×
100
                    optionalURLcount++;
×
101
                }
102
            }
103
        }
×
104
    }
1✔
105
    
106
    public DataCitation(FileMetadata fm) {
107
        this(fm, false);
×
108
    }
×
109

110
    public DataCitation(FileMetadata fm, boolean direct) {
1✔
111
        this.direct = direct;
1✔
112
        DatasetVersion dsv = fm.getDatasetVersion();
1✔
113

114
        getCommonValuesFrom(dsv);
1✔
115

116
        // file Title for direct File citation
117
        fileTitle = fm.getLabel();
1✔
118
        DataFile df = fm.getDataFile();
1✔
119

120
        // File description
121
        description = fm.getDescription();
1✔
122

123
        // The Global Identifier of the Datafile (if published and isDirect==true) or Dataset as appropriate
124
        persistentId = getPIDFrom(dsv, df);
1✔
125

126
        // UNF
127
        if (df.isTabularData() && df.getUnf() != null && !df.getUnf().isEmpty()) {
1✔
128
            UNF = df.getUnf();
×
129
        }
130
    }
1✔
131

132
    private void getCommonValuesFrom(DatasetVersion dsv) {
133

134
        getAuthorsAndProducersFrom(dsv);
1✔
135
        funders = dsv.getUniqueGrantAgencyValues();
1✔
136
        kindsOfData = dsv.getKindOfData();
1✔
137
        // publication year
138
        date = getDateFrom(dsv);
1✔
139
        year = new SimpleDateFormat("yyyy").format(date);
1✔
140

141
        datesOfCollection = dsv.getDatesOfCollection();
1✔
142
        title = dsv.getTitle();
1✔
143
        seriesTitles = dsv.getSeriesTitles();
1✔
144
        keywords = dsv.getKeywords();
1✔
145
        languages = dsv.getLanguages();
1✔
146
        spatialCoverages = dsv.getSpatialCoverages();
1✔
147
        publisher = getPublisherFrom(dsv);
1✔
148
        version = getVersionFrom(dsv);
1✔
149
    }
1✔
150

151
    public String getAuthorsString() {
152
        return String.join("; ", authors);
1✔
153
    }
154

155
    public String getTitle() {
156
        return title;
1✔
157
    }
158

159
    public String getFileTitle() {
160
        return fileTitle;
1✔
161
    }
162

163
    public boolean isDirect() {
164
        return direct;
1✔
165
    }
166

167
    
168
    public String getYear() {
169
        return year;
1✔
170
    }
171

172
    public GlobalId getPersistentId() {
173
        return persistentId;
1✔
174
    }
175

176
    public String getVersion() {
177
        return version;
1✔
178
    }
179

180
    public String getUNF() {
181
        return UNF;
1✔
182
    }
183

184
    public String getPublisher() {
185
        return publisher;
1✔
186
    }
187

188
    @Override
189
    public String toString() {
190
        return toString(false);
1✔
191
    }
192

193
    public String toString(boolean html) {
194
        return toString(html, false);
1✔
195
    }
196
    public String toString(boolean html, boolean anonymized) {
197
        // first add comma separated parts
198
        String separator = ", ";
1✔
199
        List<String> citationList = new ArrayList<>();
1✔
200
        if(anonymized) {
1✔
201
            citationList.add(BundleUtil.getStringFromBundle("file.anonymized.authorsWithheld"));
×
202
        } else {
203
            citationList.add(formatString(getAuthorsString(), html));
1✔
204
        }
205
        citationList.add(year);
1✔
206
        if ((fileTitle != null) && isDirect()) {
1✔
207
            citationList.add(formatString(fileTitle, html, "\""));
1✔
208
            citationList.add(formatString(title, html, "<em>", "</em>"));
1✔
209
        } else {
210
        citationList.add(formatString(title, html, "\""));
1✔
211
        }
212

213
        if (persistentId != null) {
1✔
214
                // always show url format
215
            citationList.add(formatURL(persistentId.asURL(), persistentId.asURL(), html)); 
1✔
216
        }
217
        citationList.add(formatString(publisher, html));
1✔
218
        citationList.add(version);
1✔
219

220
        StringBuilder citation = new StringBuilder(citationList.stream().filter(value -> !StringUtils.isEmpty(value))
1✔
221
                .collect(Collectors.joining(separator)));
1✔
222

223
        if ((fileTitle != null) && !isDirect()) {
1✔
224
            citation.append("; " + formatString(fileTitle, html, "") + " [fileName]");
1✔
225
        }
226
        // append UNF
227
        if (!StringUtils.isEmpty(UNF)) {
1✔
228
            citation.append(separator).append(UNF).append(" [fileUNF]");
×
229
        }
230

231
        for (DatasetField dsf : optionalValues) {
1✔
232
            String displayName = dsf.getDatasetFieldType().getDisplayName();
×
233
            String displayValue;
234
            
235
            if (dsf.getDatasetFieldType().getFieldType().equals(DatasetFieldType.FieldType.URL)) {
×
236
                displayValue = formatURL(dsf.getDisplayValue(), dsf.getDisplayValue(), html);
×
237
                if (optionalURLcount == 1) {
×
238
                    displayName = "URL";
×
239
                }
240
            } else {
241
                displayValue = formatString(dsf.getDisplayValue(), html);
×
242
            }
243
            citation.append(" [").append(displayName).append(": ").append(displayValue).append("]");
×
244
        }
×
245
        return citation.toString();
1✔
246
    }
247

248
    public String toBibtexString() {
249
        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1✔
250
        try {
251
            writeAsBibtexCitation(buffer);
1✔
252
        } catch (IOException e) {
×
253
            e.printStackTrace();
×
254
        }
1✔
255
        //Use UTF-8?
256
        return buffer.toString();
1✔
257
    }
258
    
259
    public void writeAsBibtexCitation(OutputStream os) throws IOException {
260
        // Use UTF-8
261
        Writer out = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8));
1✔
262
        if(getFileTitle() !=null && isDirect()) {
1✔
263
            out.write("@incollection{");
×
264
        } else {
265
            out.write("@data{");
1✔
266
        }
267
        out.write(persistentId.getIdentifier() + "_" + year + "," + "\r\n");
1✔
268
        out.write("author = {");
1✔
269
        out.write(String.join(" and ", authors));
1✔
270
        out.write("},\r\n");
1✔
271
        out.write("publisher = {");
1✔
272
        out.write(publisher);
1✔
273
        out.write("},\r\n");
1✔
274
        if(getFileTitle() !=null && isDirect()) {
1✔
275
            out.write("title = {");
×
276
            out.write(fileTitle);
×
277
            out.write("},\r\n");
×
278
            out.write("booktitle = {");
×
279
            out.write(title);
×
280
            out.write("},\r\n");
×
281
        } else {
282
            out.write("title = {{");
1✔
283
            String doubleQ = "\"";
1✔
284
            String doubleTick = "``";
1✔
285
            String doubleAp = "''";
1✔
286
            out.write(title.replaceFirst(doubleQ, doubleTick).replaceFirst(doubleQ, doubleAp));
1✔
287
            out.write("}},\r\n");
1✔
288
        }
289
        if(UNF != null){
1✔
290
            out.write("UNF = {");
×
291
            out.write(UNF);
×
292
            out.write("},\r\n");
×
293
        }
294
        out.write("year = {");
1✔
295
        out.write(year);
1✔
296
        out.write("},\r\n");
1✔
297
        out.write("version = {");
1✔
298
        out.write(version);
1✔
299
        out.write("},\r\n");
1✔
300
        if("doi".equals(persistentId.getProtocol())) {
1✔
301
            out.write("doi = {");
1✔
302
            out.write(persistentId.getAuthority());
1✔
303
            out.write("/");
1✔
304
            out.write(persistentId.getIdentifier());
1✔
305
            out.write("},\r\n");
1✔
306
        }
307
        out.write("url = {");
1✔
308
        out.write(persistentId.asURL());
1✔
309
        out.write("}\r\n");
1✔
310
        out.write("}\r\n");
1✔
311
        out.flush();
1✔
312
    }
1✔
313

314
    public String toRISString() {
315
        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1✔
316
        try {
317
            writeAsRISCitation(buffer);
1✔
318
        } catch (IOException e) {
×
319
            e.printStackTrace();
×
320
        }
1✔
321
        //Use UTF-8?
322
        return buffer.toString();
1✔
323
    }
324

325
    public void writeAsRISCitation(OutputStream os) throws IOException {
326
        // Use UTF-8
327
        Writer out = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8));
1✔
328
        out.write("Provider: " + publisher + "\r\n");
1✔
329
        out.write("Content: text/plain; charset=\"utf-8\"" + "\r\n");
1✔
330
        // Using type "DATA" - see https://github.com/IQSS/dataverse/issues/4816
331
        
332
        if ((getFileTitle()!=null)&&isDirect()) {
1✔
333
            out.write("TY  - DATA" + "\r\n");
×
334
            out.write("T1  - " + getFileTitle() + "\r\n");
×
335
            out.write("T2  - " + getTitle() + "\r\n");
×
336
        } else {
337
            out.write("TY  - DATA" + "\r\n");
1✔
338
            out.write("T1  - " + getTitle() + "\r\n");
1✔
339
        }
340
        if (seriesTitles != null) {
1✔
341
            for (String seriesTitle : seriesTitles) {
×
342
                out.write("T3  - " + seriesTitle + "\r\n");
×
343
            }
×
344
        }
345
        /* Removing abstract/description per Request from G. King in #3759
346
        if(description!=null) {
347
            out.write("AB  - " + flattenHtml(description) + "\r\n");
348
        } */
349
        for (String author : authors) {
1✔
350
            out.write("AU  - " + author + "\r\n");
1✔
351
        }
1✔
352
        
353
        if (!producers.isEmpty()) {
1✔
354
            for (String author : producers) {
×
355
                out.write("A2  - " + author + "\r\n");
×
356
            }
×
357
        }
358
        if (!funders.isEmpty()) {
1✔
359
            for (String author : funders) {
×
360
                out.write("A4  - " + author + "\r\n");
×
361
            }
×
362
        }
363
        if (!kindsOfData.isEmpty()) {
1✔
364
            for (String kod : kindsOfData) {
×
365
                out.write("C3  - " + kod + "\r\n");
×
366
            }
×
367
        }    
368
        if (!datesOfCollection.isEmpty()) {
1✔
369
            for (String dateRange : datesOfCollection) {
×
370
                out.write("DA  - " + dateRange + "\r\n");
×
371
            }
×
372
        }
373

374
        if (persistentId != null) {
1✔
375
            out.write("DO  - " + persistentId.toString() + "\r\n");
1✔
376
        }
377
        out.write("ET  - " + version + "\r\n");
1✔
378
        if (!keywords.isEmpty()) {
1✔
379
            for (String keyword : keywords) {
×
380
                out.write("KW  - " + keyword + "\r\n");
×
381
            }
×
382
        }
383
        if (!languages.isEmpty()) {
1✔
384
            for (String lang : languages) {
×
385
                out.write("LA  - " + lang + "\r\n");
×
386
            }
×
387
        }
388

389
        out.write("PY  - " + year + "\r\n");
1✔
390
        
391
        if (!spatialCoverages.isEmpty()) {
1✔
392
            for (String coverage : spatialCoverages) {
×
393
                out.write("RI  - " + coverage + "\r\n");
×
394
            }
×
395
        }
396
        
397
        out.write("SE  - " + date + "\r\n");
1✔
398

399
        out.write("UR  - " + persistentId.asURL() + "\r\n");
1✔
400
        out.write("PB  - " + publisher + "\r\n");
1✔
401

402
        // a DataFile citation also includes filename und UNF, if applicable:
403
        if (getFileTitle() != null) {
1✔
404
            if(!isDirect()) {
×
405
                out.write("C1  - " + getFileTitle() + "\r\n");
×
406
            }
407
            if (getUNF() != null) {
×
408
                out.write("C2  - " + getUNF() + "\r\n");
×
409
            }
410
        }
411
        // closing element:
412
        out.write("ER  - \r\n");
1✔
413
        out.flush();
1✔
414
    }
1✔
415

416
    private XMLOutputFactory xmlOutputFactory = null;
1✔
417

418
    public String toEndNoteString() {
419
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
1✔
420
        writeAsEndNoteCitation(outStream);
1✔
421
        String xml = outStream.toString();
1✔
422
        return xml; 
1✔
423
    } 
424
    
425
    public void writeAsEndNoteCitation(OutputStream os) {
426

427
        xmlOutputFactory = javax.xml.stream.XMLOutputFactory.newInstance();
1✔
428
        XMLStreamWriter xmlw = null;
1✔
429
        try {
430
            xmlw = xmlOutputFactory.createXMLStreamWriter(os);
1✔
431
            xmlw.writeStartDocument();
1✔
432
            createEndNoteXML(xmlw);
1✔
433
            xmlw.writeEndDocument();
1✔
434
        } catch (XMLStreamException ex) {
×
435
            Logger.getLogger("global").log(Level.SEVERE, null, ex);
×
436
            throw new EJBException("ERROR occurred during creating endnote xml.", ex);
×
437
        } finally {
438
            try {
439
                if (xmlw != null) {
1✔
440
                    xmlw.close();
1✔
441
                }
442
            } catch (XMLStreamException ex) {
×
443
            }
1✔
444
        }
445
    }
1✔
446
    
447
    private void createEndNoteXML(XMLStreamWriter xmlw) throws XMLStreamException {
448

449
        xmlw.writeStartElement("xml");
1✔
450
        xmlw.writeStartElement("records");
1✔
451

452
        xmlw.writeStartElement("record");
1✔
453

454
        // "Ref-type" indicates which of the (numerous!) available EndNote
455
        // schemas this record will be interpreted as. 
456
        // This is relatively important. Certain fields with generic 
457
        // names like "custom1" and "custom2" become very specific things
458
        // in specific schemas; for example, custom1 shows as "legal notice"
459
        // in "Journal Article" (ref-type 84), or as "year published" in 
460
        // "Government Document". 
461
        // We don't want the UNF to show as a "legal notice"! 
462
        // We have found a ref-type that works ok for our purposes - 
463
        // "Dataset" (type 59). In this one, the fields Custom1
464
        // and Custom2 are not translated and just show as is. 
465
        // And "Custom1" still beats "legal notice". 
466
        // -- L.A. 12.12.2014 beta 10
467
        // and see https://github.com/IQSS/dataverse/issues/4816
468
        
469
        xmlw.writeStartElement("ref-type");
1✔
470
        xmlw.writeAttribute("name", "Dataset");
1✔
471
        xmlw.writeCharacters("59");
1✔
472
        xmlw.writeEndElement(); // ref-type
1✔
473

474
        xmlw.writeStartElement("contributors");
1✔
475
        if (!authors.isEmpty()) {
1✔
476
        xmlw.writeStartElement("authors");
1✔
477
        for (String author : authors) {
1✔
478
            xmlw.writeStartElement("author");
1✔
479
            xmlw.writeCharacters(author);
1✔
480
            xmlw.writeEndElement(); // author                    
1✔
481
        }
1✔
482
        xmlw.writeEndElement(); // authors 
1✔
483
        }
484
        if (!producers.isEmpty()) {
1✔
485
            xmlw.writeStartElement("secondary-authors");
×
486
            for (String producer : producers) {
×
487
                xmlw.writeStartElement("author");
×
488
                xmlw.writeCharacters(producer);
×
489
                xmlw.writeEndElement(); // author
×
490
            }
×
491
            xmlw.writeEndElement(); // secondary-authors
×
492
        }
493
        if (!funders.isEmpty()) {
1✔
494
            xmlw.writeStartElement("subsidiary-authors");
×
495
            for (String funder : funders) {
×
496
                xmlw.writeStartElement("author");
×
497
                xmlw.writeCharacters(funder);
×
498
                xmlw.writeEndElement(); // author
×
499
            }
×
500
            xmlw.writeEndElement(); // subsidiary-authors
×
501
        }
502
        xmlw.writeEndElement(); // contributors 
1✔
503

504
        xmlw.writeStartElement("titles");
1✔
505
        if ((fileTitle != null) && isDirect()) {
1✔
506
            xmlw.writeStartElement("title");
×
507
            xmlw.writeCharacters(fileTitle);
×
508
            xmlw.writeEndElement(); // title
×
509
            xmlw.writeStartElement("secondary-title");
×
510
            xmlw.writeCharacters(title);
×
511
            xmlw.writeEndElement(); // secondary-title
×
512
        } else {
513
        xmlw.writeStartElement("title");
1✔
514
        xmlw.writeCharacters(title);
1✔
515
        xmlw.writeEndElement(); // title
1✔
516
        }
517

518
        /*
519
        If I say just !"isEmpty" for series titles I get a failure 
520
        on testToEndNoteString_withoutTitleAndAuthor
521
        with a null pointer on build -SEK 3/31/23
522
        */
523
        if (seriesTitles != null && !seriesTitles.isEmpty() ) {
1✔
524
            xmlw.writeStartElement("tertiary-titles");
×
525
            for (String seriesTitle : seriesTitles){
×
526
                xmlw.writeStartElement("tertiary-title");
×
527
                xmlw.writeCharacters(seriesTitle);
×
528
                xmlw.writeEndElement(); // tertiary-title
×
529
            }
×
530
            xmlw.writeEndElement(); // tertiary-title
×
531
        }
532
        
533
        xmlw.writeEndElement(); // titles
1✔
534

535
        xmlw.writeStartElement("section");
1✔
536
        String sectionString;
537
        sectionString = new SimpleDateFormat("yyyy-MM-dd").format(date);
1✔
538

539
        xmlw.writeCharacters(sectionString);
1✔
540
        xmlw.writeEndElement(); // section
1✔
541
/* Removing abstract/description per Request from G. King in #3759
542
        xmlw.writeStartElement("abstract");
543
        if(description!=null) {
544
            xmlw.writeCharacters(flattenHtml(description));
545
        }
546
        xmlw.writeEndElement(); // abstract
547
         */
548

549
        xmlw.writeStartElement("dates");
1✔
550
        xmlw.writeStartElement("year");
1✔
551
        xmlw.writeCharacters(year);
1✔
552
        xmlw.writeEndElement(); // year
1✔
553
        if (!datesOfCollection.isEmpty()) {
1✔
554
            xmlw.writeStartElement("pub-dates");
×
555
            for (String dateRange : datesOfCollection) {
×
556
                xmlw.writeStartElement("date");
×
557
                xmlw.writeCharacters(dateRange);
×
558
                xmlw.writeEndElement(); // date
×
559
            }
×
560
            xmlw.writeEndElement(); // pub-dates
×
561
        }
562
        xmlw.writeEndElement(); // dates
1✔
563

564
        xmlw.writeStartElement("edition");
1✔
565
        xmlw.writeCharacters(version);
1✔
566
        xmlw.writeEndElement(); // edition
1✔
567

568
        if (!keywords.isEmpty()) {
1✔
569
            xmlw.writeStartElement("keywords");
×
570
            for (String keyword : keywords) {
×
571
                xmlw.writeStartElement("keyword");
×
572
                xmlw.writeCharacters(keyword);
×
573
                xmlw.writeEndElement(); // keyword
×
574
            }
×
575
            xmlw.writeEndElement(); // keywords
×
576
        }
577
        if (!kindsOfData.isEmpty()) {
1✔
578
            for (String kod : kindsOfData) {
×
579
                xmlw.writeStartElement("custom3");
×
580
                xmlw.writeCharacters(kod);
×
581
                xmlw.writeEndElement(); // custom3
×
582
            }
×
583
        }
584
        if (!languages.isEmpty()) {
1✔
585
            for (String lang : languages) {
×
586
                xmlw.writeStartElement("language");
×
587
                xmlw.writeCharacters(lang);
×
588
                xmlw.writeEndElement(); // language
×
589
            }
×
590
        }
591
        xmlw.writeStartElement("publisher");
1✔
592
        xmlw.writeCharacters(publisher);
1✔
593
        xmlw.writeEndElement(); // publisher
1✔
594

595
        if (!spatialCoverages.isEmpty()) {
1✔
596
            for (String coverage : spatialCoverages) {
×
597
                xmlw.writeStartElement("reviewed-item");
×
598
                xmlw.writeCharacters(coverage);
×
599
                xmlw.writeEndElement(); // reviewed-item
×
600
            }
×
601
        }
602

603
        xmlw.writeStartElement("urls");
1✔
604
        if (persistentId != null) {
1✔
605
            if (PERMA_PROTOCOL.equals(persistentId.getProtocol()) || HDL_PROTOCOL.equals(persistentId.getProtocol())) {
1✔
NEW
606
                xmlw.writeStartElement("web-urls");
×
NEW
607
                xmlw.writeStartElement("url");
×
NEW
608
                xmlw.writeCharacters(getPersistentId().asURL());
×
NEW
609
                xmlw.writeEndElement(); // url
×
NEW
610
                xmlw.writeEndElement(); // web-urls
×
611
            } else if (DOI_PROTOCOL.equals(persistentId.getProtocol())) {
1✔
612
                xmlw.writeStartElement("related-urls");
1✔
613
                xmlw.writeStartElement("url");
1✔
614
                xmlw.writeCharacters(getPersistentId().asURL());
1✔
615
                xmlw.writeEndElement(); // url
1✔
616
                xmlw.writeEndElement(); // related-urls
1✔
617
            }
618
        }
619
        xmlw.writeEndElement(); // urls
1✔
620
        
621
        // a DataFile citation also includes the filename and (for Tabular
622
        // files) the UNF signature, that we put into the custom1 and custom2 
623
        // fields respectively:
624
        
625
        if (getFileTitle() != null) {
1✔
626
            xmlw.writeStartElement("custom1");
×
627
            xmlw.writeCharacters(fileTitle);
×
628
            xmlw.writeEndElement(); // custom1
×
629
            
630
                if (getUNF() != null) {
×
631
                    xmlw.writeStartElement("custom2");
×
632
                    xmlw.writeCharacters(getUNF());
×
633
                    xmlw.writeEndElement(); // custom2
×
634
            }
635
        }
636
        if (persistentId != null && "doi".equals(persistentId.getProtocol())) {
1✔
637
            xmlw.writeStartElement("electronic-resource-num");
1✔
638
            String electResourceNum = persistentId.asRawIdentifier();
1✔
639
            xmlw.writeCharacters(electResourceNum);
1✔
640
            xmlw.writeEndElement();
1✔
641
        }
642
        //<electronic-resource-num>10.3886/ICPSR03259.v1</electronic-resource-num>                  
643
        xmlw.writeEndElement(); // record
1✔
644

645
        xmlw.writeEndElement(); // records
1✔
646
        xmlw.writeEndElement(); // xml
1✔
647

648
    }
1✔
649

650
        public Map<String, String> getDataCiteMetadata() {
651
        Map<String, String> metadata = new HashMap<>();
1✔
652
        String authorString = getAuthorsString();
1✔
653

654
        if (authorString.isEmpty()) {
1✔
655
            authorString = AbstractPidProvider.UNAVAILABLE;
×
656
    }
657
        String producerString = getPublisher();
1✔
658

659
        if (producerString.isEmpty()) {
1✔
660
            producerString =  AbstractPidProvider.UNAVAILABLE;
×
661
        }
662

663
        metadata.put("datacite.creator", authorString);
1✔
664
        metadata.put("datacite.title", getTitle());
1✔
665
        metadata.put("datacite.publisher", producerString);
1✔
666
        metadata.put("datacite.publicationyear", getYear());
1✔
667
        return metadata;
1✔
668
        }
669

670
        
671
    // helper methods   
672
    private String formatString(String value, boolean escapeHtml) {
673
        return formatString(value, escapeHtml, "");
1✔
674
    }
675

676
    private String formatString(String value, boolean escapeHtml, String wrapperFront) {
677
        return formatString(value, escapeHtml, wrapperFront, wrapperFront);
1✔
678
    }
679

680
    private String formatString(String value, boolean escapeHtml, String wrapperStart, String wrapperEnd) {
681
        if (!StringUtils.isEmpty(value)) {
1✔
682
            return new StringBuilder(wrapperStart).append(escapeHtml ? StringEscapeUtils.escapeHtml4(value) : value)
1✔
683
                    .append(wrapperEnd).toString();
1✔
684
        }
685
        return null;
1✔
686
    }
687

688
    private String formatURL(String text, String url, boolean html) {
689
        if (text == null) {
1✔
690
            return null;
×
691
        }
692

693
        if (html && url != null) {
1✔
694
            return "<a href=\"" + url + "\" target=\"_blank\">" + StringEscapeUtils.escapeHtml4(text) + "</a>";
1✔
695
        } else {
696
            return text;
1✔
697
        }
698
    }
699

700
    /** This method flattens html for the textual export formats.
701
     * It removes <b> and <i> tags, replaces <br>, <p> and headers <hX> with 
702
     * line breaks, converts lists to form where items start with an indented '*  ',
703
     * and converts links to simple text showing the label and, if different, 
704
     * the url in parenthesis after it. Since these operations may create
705
     * multiple line breaks, a final step limits the changes and compacts multiple 
706
     * line breaks into one.  
707
     *
708
     * @param html input string
709
     * @return the flattened text output
710
     */
711
    private String flattenHtml(String html) {
712
        html = html.replaceAll("<[pP]>", "\r\n");
×
713
        html = html.replaceAll("<\\/[pP]>", "\r\n");
×
714
        html = html.replaceAll("<[hH]\\d>", "\r\n");
×
715
        html = html.replaceAll("<\\/[hH]\\d>", "\r\n");
×
716
        html = html.replaceAll("<[\\/]?[bB]>", "");
×
717
        html = html.replaceAll("<[\\/]?[iI]>", "\r\n");
×
718
        
719
        html = html.replaceAll("<[bB][rR][\\/]?>", "\r\n");
×
720
        html = html.replaceAll("<[uU][lL]>", "\r\n");
×
721
        html = html.replaceAll("<\\/[uU][lL]>", "\r\n");
×
722
        html = html.replaceAll("<[lL][iI]>", "\t*  ");
×
723
        html = html.replaceAll("<\\/[lL][iI]>", "\r\n");
×
724
        Pattern p = Pattern.compile("<a\\W+href=\\\"(.*?)\\\".*?>(.*?)<\\/a>");
×
725
        Matcher m = p.matcher(html);
×
726
        String url = null;
×
727
        String label = null;
×
728
        while(m.find()) {
×
729
            url = m.group(1); // this variable should contain the link URL
×
730
            label = m.group(2); // this variable should contain the label
×
731
            //display either the label or label(url)
732
            if(!url.equals(label)) {
×
733
                label = label + "(" + url +")";
×
734
            }
735
            html = html.replaceFirst("<a\\W+href=\\\"(.*?)\\\".*?>(.*?)<\\/a>", label);
×
736
        }
737
        //Note, this does not affect single '\n' chars originally in the text
738
        html=html.replaceAll("(\\r\\n?)+", "\r\n");
×
739
        
740
        return html;
×
741
    }
742

743
    private Date getDateFrom(DatasetVersion dsv) {
744
        Date citationDate = null;
1✔
745

746
        if (dsv.getDataset().isHarvested()) {
1✔
747
            citationDate = DateUtil.parseDate(dsv.getProductionDate());
×
748
            if (citationDate == null) {
×
749
                citationDate = DateUtil.parseDate(dsv.getDistributionDate());
×
750
            }
751
        }
752

753
        if (citationDate == null) {
1✔
754
            if (dsv.getCitationDate() != null) {
1✔
755
                citationDate = dsv.getCitationDate();
×
756
            } else if (dsv.getDataset().getCitationDate() != null) {
1✔
757
                citationDate = dsv.getDataset().getCitationDate();
1✔
758
            } else { // for drafts
759
                citationDate = dsv.getLastUpdateTime();
1✔
760
            }
761
        }
762

763
        if (citationDate == null) {
1✔
764
            //As a last resort, pick the current date
765
            logger.warning("Unable to find citation date for datasetversion: " + dsv.getId());
1✔
766
            citationDate = new Date();
1✔
767
        }
768
        return citationDate;
1✔
769
    }
770

771
    private void getAuthorsAndProducersFrom(DatasetVersion dsv) {
772

773
        dsv.getDatasetAuthors().stream().forEach((author) -> {
1✔
774
            if (!author.isEmpty()) {
1✔
775
                String an = author.getName().getDisplayValue().trim();
1✔
776
                authors.add(an);
1✔
777
            }
778
        });
1✔
779
        producers = dsv.getDatasetProducerNames();
1✔
780
    }
1✔
781

782
    private String getPublisherFrom(DatasetVersion dsv) {
783
        if (!dsv.getDataset().isHarvested()) {
1✔
784
            return BrandingUtil.getInstallationBrandName();
1✔
785
        } else {
786
            return dsv.getDistributorName();
×
787
            // remove += [distributor] SEK 8-18-2016
788
        }
789
    }
790

791
    private String getVersionFrom(DatasetVersion dsv) {
792
        String version = "";
1✔
793
        if (!dsv.getDataset().isHarvested()) {
1✔
794
            if (dsv.isDraft()) {
1✔
795
                version = BundleUtil.getStringFromBundle("draftversion");
1✔
796
            } else if (dsv.getVersionNumber() != null) {
1✔
797
                version = "V" + dsv.getVersionNumber();
1✔
798
                if (dsv.isDeaccessioned()) {
1✔
799
                    version += ", "+ BundleUtil.getStringFromBundle("deaccessionedversion");
×
800
                }
801
            }
802
        }
803
        return version;
1✔
804
    }
805

806
    private GlobalId getPIDFrom(DatasetVersion dsv, DvObject dv) {
807
        if (!dsv.getDataset().isHarvested()
1✔
808
                || HarvestingClient.HARVEST_STYLE_VDC.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle())
×
809
                || HarvestingClient.HARVEST_STYLE_ICPSR.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle())
×
810
                || HarvestingClient.HARVEST_STYLE_DEFAULT.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle())
×
811
                || HarvestingClient.HARVEST_STYLE_DATAVERSE
812
                        .equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle())) {
×
813
                if(!isDirect()) {
1✔
814
                if (!StringUtils.isEmpty(dsv.getDataset().getIdentifier())) {
1✔
815
                    return dsv.getDataset().getGlobalId();
1✔
816
                }
817
                } else {
818
                if (!StringUtils.isEmpty(dv.getIdentifier())) {
1✔
819
                    return dv.getGlobalId();
1✔
820
                }
821
            }
822
        }
823
        return null;
×
824
    }
825
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc