• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #22693

03 Jul 2024 01:09PM CUT coverage: 20.626% (-0.09%) from 20.716%
#22693

push

github

web-flow
Merge pull request #10664 from IQSS/develop

merge develop into master for 6.3

195 of 1852 new or added lines in 82 files covered. (10.53%)

72 existing lines in 33 files now uncovered.

17335 of 84043 relevant lines covered (20.63%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

57.65
/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java
1
package edu.harvard.iq.dataverse.sitemap;
2

3
import java.io.File;
4
import java.io.IOException;
5
import java.net.MalformedURLException;
6
import java.nio.file.Files;
7
import java.nio.file.Path;
8
import java.nio.file.Paths;
9
import java.text.ParseException;
10
import java.time.format.DateTimeFormatter;
11
import java.util.List;
12
import java.util.logging.Logger;
13

14
import com.redfin.sitemapgenerator.W3CDateFormat;
15
import com.redfin.sitemapgenerator.W3CDateFormat.Pattern;
16
import com.redfin.sitemapgenerator.WebSitemapGenerator;
17
import com.redfin.sitemapgenerator.WebSitemapUrl;
18

19
import edu.harvard.iq.dataverse.Dataset;
20
import edu.harvard.iq.dataverse.Dataverse;
21
import edu.harvard.iq.dataverse.DvObjectContainer;
22
import edu.harvard.iq.dataverse.settings.ConfigCheckService;
23
import edu.harvard.iq.dataverse.settings.JvmSettings;
24
import edu.harvard.iq.dataverse.util.SystemConfig;
25

26
public class SiteMapUtil {
×
27

28
    static final String DATE_PATTERN = "yyyy-MM-dd";
29
    static final String SITEMAP_FILENAME_STAGED = "sitemap.xml.staged";
30
    /** @see https://www.sitemaps.org/protocol.html#index */
31
    static final int SITEMAP_LIMIT = 50000;
32

33
    private static final Logger logger = Logger.getLogger(SiteMapUtil.class.getCanonicalName());
1✔
34
    private static DateTimeFormatter formatter = DateTimeFormatter.ofPattern(DATE_PATTERN);
1✔
35

36

37
    public static void updateSiteMap(List<Dataverse> dataverses, List<Dataset> datasets) {
38

39
        logger.info("BEGIN updateSiteMap");
1✔
40

41
        final String dataverseSiteUrl = SystemConfig.getDataverseSiteUrlStatic();
1✔
42
        final String msgErrorFormat = "Problem with %s : %s. The exception is %s";
1✔
43
        final String msgErrorW3CFormat = "%s isn't a valid W3C date time for %s. The exception is %s";
1✔
44
        final String sitemapPathString = getSitemapPathString();
1✔
45
        final String stagedSitemapPathAndFileString = sitemapPathString + File.separator + SITEMAP_FILENAME_STAGED;
1✔
46
        final Path stagedSitemapPath = Paths.get(stagedSitemapPathAndFileString);
1✔
47

48
        if (Files.exists(stagedSitemapPath)) {
1✔
NEW
49
            logger.warning(String.format(
×
50
                    "Unable to update sitemap! The staged file from a previous run already existed. Delete %s and try again.",
51
                    stagedSitemapPathAndFileString));
UNCOV
52
            return;
×
53
        }
54

55
        final File directory = new File(sitemapPathString);
1✔
56
        if (!directory.exists()) {
1✔
57
            directory.mkdir();
1✔
58
        }
59

60
        // Use DAY pattern (YYYY-MM-DD), local machine timezone
61
        final W3CDateFormat dateFormat = new W3CDateFormat(Pattern.DAY);
1✔
62
        WebSitemapGenerator wsg = null;
1✔
63
        try {
64
            // All sitemap files are in "sitemap" folder, see "getSitemapPathString" method.
65
            // But with pretty-faces configuration, "sitemap.xml" and "sitemap_index.xml" are accessible directly,
66
            // like "https://demo.dataverse.org/sitemap.xml". So "/sitemap/" need to be added on "WebSitemapGenerator"
67
            // in order to have valid URL for sitemap location.
68
            wsg = WebSitemapGenerator.builder(dataverseSiteUrl + "/sitemap/", directory).autoValidate(true).dateFormat(dateFormat)
1✔
69
                    .build();
1✔
NEW
70
        } catch (MalformedURLException e) {
×
NEW
71
            logger.warning(String.format(msgErrorFormat, "Dataverse site URL", dataverseSiteUrl, e.getLocalizedMessage()));
×
UNCOV
72
            return;
×
73
        }
1✔
74

75
        for (Dataverse dataverse : dataverses) {
1✔
76
            if (!dataverse.isReleased()) {
1✔
77
                continue;
×
78
            }
79
            final String dvAlias = dataverse.getAlias();
1✔
80
            final String dataverseUrl = dataverseSiteUrl + "/dataverse/" + dvAlias;
1✔
81
            final String lastModDate = getLastModDate(dataverse);
1✔
82
            try {
83
                final WebSitemapUrl url = new WebSitemapUrl.Options(dataverseUrl).lastMod(lastModDate).build();
1✔
84
                wsg.addUrl(url);
1✔
NEW
85
            } catch (MalformedURLException e) {
×
NEW
86
                logger.fine(String.format(msgErrorFormat, "dataverse URL", dataverseUrl, e.getLocalizedMessage()));
×
NEW
87
            } catch (ParseException e) {
×
NEW
88
                logger.fine(String.format(msgErrorW3CFormat, lastModDate, "dataverse alias " + dvAlias, e.getLocalizedMessage()));
×
89
            }
1✔
90
        }
1✔
91

92
        for (Dataset dataset : datasets) {
1✔
93
            // The deaccessioned check is last because it has to iterate through dataset versions.
94
            if (!dataset.isReleased() || dataset.isHarvested() || dataset.isDeaccessioned()) {
1✔
95
                continue;
1✔
96
            }
97
            final String datasetPid = dataset.getGlobalId().asString();
1✔
98
            final String datasetUrl = dataverseSiteUrl + "/dataset.xhtml?persistentId=" + datasetPid;
1✔
99
            final String lastModDate = getLastModDate(dataset);
1✔
100
            try {
101
                final WebSitemapUrl url = new WebSitemapUrl.Options(datasetUrl).lastMod(lastModDate).build();
1✔
102
                wsg.addUrl(url);
1✔
NEW
103
            } catch (MalformedURLException e) {
×
NEW
104
                logger.fine(String.format(msgErrorFormat, "dataset URL", datasetUrl, e.getLocalizedMessage()));
×
NEW
105
            } catch (ParseException e) {
×
NEW
106
                logger.fine(String.format(msgErrorW3CFormat, lastModDate, "dataset " + datasetPid, e.getLocalizedMessage()));
×
107
            }
1✔
108
        }
1✔
109

110
        logger.info(String.format("Writing and checking sitemap file into %s", sitemapPathString));
1✔
111
        try {
112
            wsg.write();
1✔
113
            if (dataverses.size() + datasets.size() > SITEMAP_LIMIT) {
1✔
114
                wsg.writeSitemapsWithIndex();
1✔
115
            }
116
        } catch (Exception ex) {
×
NEW
117
            final StringBuffer errorMsg = new StringBuffer("Unable to write or validate sitemap ! The exception is ");
×
NEW
118
            errorMsg.append(ex.getLocalizedMessage());
×
119
            // Add causes messages exception
NEW
120
            Throwable cause = ex.getCause();
×
121
            // Fix limit to 5 causes
NEW
122
            final int causeLimit = 5;
×
NEW
123
            int cpt = 0;
×
NEW
124
            while (cause != null && cpt < causeLimit) {
×
NEW
125
                errorMsg.append(" with cause ").append(cause.getLocalizedMessage());
×
NEW
126
                cause = ex.getCause();
×
NEW
127
                cpt = cpt + 1;
×
128
            }
NEW
129
            logger.warning(errorMsg.toString());
×
UNCOV
130
            return;
×
131
        }
1✔
132

133
        logger.info(String.format("Remove staged sitemap %s", stagedSitemapPathAndFileString));
1✔
134
        try {
135
            Files.deleteIfExists(stagedSitemapPath);
1✔
136
        } catch (IOException ex) {
×
NEW
137
            logger.warning("Unable to delete sitemap staged file! IOException: " + ex.getLocalizedMessage());
×
138
            return;
×
139
        }
1✔
140

141
        logger.info("END updateSiteMap");
1✔
142
    }
1✔
143

144
    private static String getLastModDate(DvObjectContainer dvObjectContainer) {
145
        // TODO: Decide if YYYY-MM-DD is enough. https://www.sitemaps.org/protocol.html
146
        // says "The date of last modification of the file. This date should be in W3C Datetime format.
147
        // This format allows you to omit the time portion, if desired, and use YYYY-MM-DD."
148
        return dvObjectContainer.getModificationTime().toLocalDateTime().format(formatter);
1✔
149
    }
150

151
    public static boolean stageFileExists() {
NEW
152
        String stagedSitemapPathAndFileString = getSitemapPathString() + File.separator + SITEMAP_FILENAME_STAGED;
×
153
        Path stagedPath = Paths.get(stagedSitemapPathAndFileString);
×
154
        if (Files.exists(stagedPath)) {
×
155
            logger.warning("Unable to update sitemap! The staged file from a previous run already existed. Delete " + stagedSitemapPathAndFileString + " and try again.");
×
156
            return true;
×
157
        }
158
        return false;
×
159
    }
160

161
    /**
162
     * Lookup the location where to generate the sitemap.
163
     *
164
     * Note: the location is checked to be configured, does exist and is writeable in
165
     * {@link ConfigCheckService#checkSystemDirectories()}
166
     *
167
     * @return Sitemap storage location ([docroot]/sitemap)
168
     */
169
    private static String getSitemapPathString() {
170
        return JvmSettings.DOCROOT_DIRECTORY.lookup() + File.separator + "sitemap";
1✔
171
    }
172

173
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc