• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #22929

15 Aug 2024 06:49PM CUT coverage: 20.791% (-0.009%) from 20.8%
#22929

push

github

web-flow
IQSS/7068 Reserve File Pids (#7334)

* file pid reservation

* add file pid reservation step to publish

(analogous to dataset pid register if needed)

Conflicts:
	src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java
	src/main/java/propertyFiles/Bundle.properties

* comment change

* check if file PIDs used once, use constants - per comments

* adding release note

* release notes, API doc update

* reflecting datasets and files for the PID endpoint

* removing release note about pre-reg for file PIDs as this is not supported

* file pid pre-reservation

Conflicts:
	src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java
	src/main/java/propertyFiles/Bundle.properties

* avoid problem when GlobalIDServiceBean implicitly merges

@kcondon sees a DB error persisting a file with null createtime during
the GlobalIDServiceBean.getBean call which uses a set namedQuery to find
the :DoiProvider. Create times for files are set above, but not merged
prior to calling registerFilePidsIfNeeded. Assuming the namedQuery is
forcing the file (without a merge) to persist which triggers the error.
In #7337, the code is reworked so there is a merge prior to
registerFilePidsIfNeeded. This commit adds one temporarily so this PR
works indepdently of the other.

* update theDataset

* noting that PID reservation can cause old timeouts to be too short

* more specifics

* release note update

* cleanup reformatting

* further cleanup

* set createTime earlier

---------

Co-authored-by: Danny Brooke <danny_brooke@harvard.edu>

0 of 54 new or added lines in 4 files covered. (0.0%)

3 existing lines in 1 file now uncovered.

17524 of 84285 relevant lines covered (20.79%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

25.74
/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java
1
package edu.harvard.iq.dataverse.engine.command.impl;
2

3
import edu.harvard.iq.dataverse.DataFile;
4
import edu.harvard.iq.dataverse.Dataset;
5
import edu.harvard.iq.dataverse.DatasetField;
6
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
7
import edu.harvard.iq.dataverse.DatasetVersion;
8
import edu.harvard.iq.dataverse.DatasetVersionDifference;
9
import edu.harvard.iq.dataverse.DatasetVersionUser;
10
import edu.harvard.iq.dataverse.Dataverse;
11
import edu.harvard.iq.dataverse.MetadataBlock;
12
import edu.harvard.iq.dataverse.TermsOfUseAndAccess;
13
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
14
import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
15
import edu.harvard.iq.dataverse.engine.command.CommandContext;
16
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
17
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
18
import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException;
19
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
20
import edu.harvard.iq.dataverse.pidproviders.PidProvider;
21
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
22
import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeDOIProvider;
23
import edu.harvard.iq.dataverse.util.BundleUtil;
24

25
import java.sql.Timestamp;
26
import java.util.Arrays;
27
import java.util.Date;
28
import java.util.Set;
29
import java.util.logging.Level;
30
import java.util.logging.Logger;
31
import static java.util.stream.Collectors.joining;
32

33
import jakarta.ejb.EJB;
34
import jakarta.validation.ConstraintViolation;
35
import edu.harvard.iq.dataverse.settings.JvmSettings;
36

37
/**
38
 *
39
 * Base class for commands that deal with {@code Dataset}s.Mainly here as a code
40
 * re-use mechanism.
41
 *
42
 * @author michael
43
 * @param <T> The type of the command's result. Normally {@link Dataset}.
44
 */
45
public abstract class AbstractDatasetCommand<T> extends AbstractCommand<T> {
46

47
    private static final Logger logger = Logger.getLogger(AbstractDatasetCommand.class.getName());
1✔
48
    private static final int FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT = 2 ^ 8;
49
    private Dataset dataset;
50
    private final Timestamp timestamp = new Timestamp(new Date().getTime());
1✔
51

52
    public AbstractDatasetCommand(DataverseRequest aRequest, Dataset aDataset, Dataverse parent) {
53
        super(aRequest, parent);
1✔
54
        if (aDataset == null) {
1✔
55
            throw new IllegalArgumentException("aDataset cannot be null");
1✔
56
        }
57
        dataset = aDataset;
×
58
    }
×
59

60
    public AbstractDatasetCommand(DataverseRequest aRequest, Dataset aDataset) {
61
        super(aRequest, aDataset);
1✔
62
        if (aDataset == null) {
1✔
63
            throw new IllegalArgumentException("aDataset cannot be null");
1✔
64
        }
65
        dataset = aDataset;
1✔
66
    }
1✔
67

68
    /**
69
     * Creates/updates the {@link DatasetVersionUser} for our {@link #dataset}. After
70
     * calling this method, there is a {@link DatasetUser} object connecting
71
     * {@link #dataset} and the {@link AuthenticatedUser} who issued this
72
     * command, with the {@code lastUpdate} field containing {@link #timestamp}.
73
     *
74
     * @param ctxt The command context in which this command runs.
75
     */
76
    protected void updateDatasetUser(CommandContext ctxt) {
77
        DatasetVersionUser datasetDataverseUser = ctxt.datasets().getDatasetVersionUser(getDataset().getLatestVersion(), getUser());
1✔
78

79
        if (datasetDataverseUser != null) {
1✔
80
            // Update existing dataset-user
81
            datasetDataverseUser.setLastUpdateDate(getTimestamp());
×
82
            ctxt.em().merge(datasetDataverseUser);
×
83

84
        } else {
85
            // create a new dataset-user
86
            createDatasetUser(ctxt);
1✔
87
        }
88
    }
1✔
89
    
90
    protected void createDatasetUser(CommandContext ctxt) {
91
        DatasetVersionUser datasetDataverseUser = new DatasetVersionUser();
1✔
92
        datasetDataverseUser.setDatasetVersion(getDataset().getLatestVersion());
1✔
93
        datasetDataverseUser.setLastUpdateDate(getTimestamp());
1✔
94
        datasetDataverseUser.setAuthenticatedUser((AuthenticatedUser) getUser());
1✔
95
        ctxt.em().persist(datasetDataverseUser);
1✔
96
    }
1✔
97
    
98
    /**
99
     * Validates the fields of the {@link DatasetVersion} passed. Throws an
100
     * informational error if validation fails.
101
     *
102
     * @param dsv The dataset version whose fields we validate
103
     * @param lenient when {@code true}, invalid fields are populated with N/A
104
     * value.
105
     * @throws CommandException if and only if {@code lenient=false}, and field
106
     * validation failed.
107
     */
108
    protected void validateOrDie(DatasetVersion dsv, Boolean lenient) throws CommandException {
109
        Set<ConstraintViolation> constraintViolations = dsv.validate();
1✔
110
        if (!constraintViolations.isEmpty()) {
1✔
111
            if (lenient) {
×
112
                // populate invalid fields with N/A
113
                constraintViolations.stream()
×
114
                    .filter(cv -> cv.getRootBean() instanceof DatasetField)
×
115
                    .map(cv -> ((DatasetField) cv.getRootBean()))
×
116
                    .forEach(f -> f.setSingleValue(DatasetField.NA_VALUE));
×
117

118
            } else {
119
                // explode with a helpful message
120
                String validationMessage = constraintViolations.stream()
×
121
                    .map(cv -> cv.getMessage() + " (Invalid value:" + cv.getInvalidValue() + ")")
×
122
                    .collect(joining(", ", "Validation Failed: ", "."));
×
123
                
124
                validationMessage  += constraintViolations.stream()
×
125
                    .filter(cv -> cv.getRootBean() instanceof TermsOfUseAndAccess)
×
126
                    .map(cv -> cv.toString());
×
127
                
128
                for (ConstraintViolation cv : constraintViolations){
×
129
                    if (cv.getRootBean() instanceof TermsOfUseAndAccess){
×
130
                        throw new IllegalCommandException(validationMessage,  this);
×
131
                    }
132
                }
×
133

134
                throw new IllegalCommandException(validationMessage, this);
×
135
            }
136
        }
137
    }
1✔
138

139

140

141
    /**
142
     * Whether it's EZID or DataCite, if the registration is refused because the
143
     * identifier already exists, we'll generate another one and try to register
144
     * again... but only up to some reasonably high number of times - so that we
145
     * don't go into an infinite loop here, if EZID is giving us these duplicate
146
     * messages in error.
147
     *
148
     * (and we do want the limit to be a "reasonably high" number! true, if our
149
     * identifiers are randomly generated strings, then it is highly unlikely
150
     * that we'll ever run into a duplicate race condition repeatedly; but if
151
     * they are sequential numeric values, than it is entirely possible that a
152
     * large enough number of values will be legitimately registered by another
153
     * entity sharing the same authority...)
154
     *
155
     * @param theDataset
156
     * @param ctxt
157
     * @throws CommandException
158
     */
159
    protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctxt, boolean retry) throws CommandException {
160
        if (!theDataset.isIdentifierRegistered()) {
×
161
            PidProvider pidProvider = PidUtil.getPidProvider(theDataset.getGlobalId().getProviderId());
×
162
            if ( pidProvider != null ) {
×
163
                try {
164
                    if (pidProvider.alreadyRegistered(theDataset)) {
×
165
                        int attempts = 0;
×
166
                        if(retry) {
×
167
                            do  {
168
                                pidProvider.generatePid(theDataset);
×
169
                                logger.log(Level.INFO, "Attempting to register external identifier for dataset {0} (trying: {1}).",
×
170
                                    new Object[]{theDataset.getId(), theDataset.getIdentifier()});
×
171
                                attempts++;
×
172
                            } while (pidProvider.alreadyRegistered(theDataset) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT);
×
173
                        }
174
                        if(!retry) {
×
NEW
175
                            logger.warning("Reserving PID for: "  + getDataset().getId() + " failed.");
×
NEW
176
                            throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.pidNotReserved", Arrays.asList(theDataset.getIdentifier())), this);
×
177
                        }
178
                        if(attempts > FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT) {
×
179
                            //Didn't work - we existed the loop with too many tries
NEW
180
                            throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.pidReservationRetryExceeded", Arrays.asList(Integer.toString(attempts), theDataset.getIdentifier())), this);
×
181
                        }
182
                    }
183
                    // Invariant: Dataset identifier does not exist in the remote registry
184
                    try {
185
                        pidProvider.createIdentifier(theDataset);
×
186
                        theDataset.setGlobalIdCreateTime(getTimestamp());
×
187
                        theDataset.setIdentifierRegistered(true);
×
188
                    } catch (Throwable ex) {
×
189
                        logger.info("Call to globalIdServiceBean.createIdentifier failed: " + ex);
×
190
                    }
×
191

192
                } catch (Throwable e) {
×
NEW
193
                    if (e instanceof CommandException) {
×
NEW
194
                        throw (CommandException) e;
×
195
                    }
196
                    throw new CommandException(BundleUtil.getStringFromBundle("dataset.publish.error", pidProvider.getProviderInformation()), this);
×
197
                }
×
198
            } else {
199
                throw new IllegalCommandException("This dataset may not be published because its id registry service is not supported.", this);
×
200
            }
201

202
        }
203
    }
×
204

205
    protected Dataset getDataset() {
206
        return dataset;
1✔
207
    }
208

209
    public void setDataset(Dataset dataset) {
210
        this.dataset = dataset;
×
211
    }
×
212

213
    /**
214
     * The time the command instance was created. Note: This is not the time the
215
     * command was submitted to the engine. If the difference can be large
216
     * enough, consider using another timestamping mechanism. This is a
217
     * convenience method fit for most cases.
218
     *
219
     * @return the time {@code this} command was created.
220
     */
221
    protected Timestamp getTimestamp() {
222
        return timestamp;
1✔
223
    }
224

225
    protected void registerFilePidsIfNeeded(Dataset theDataset, CommandContext ctxt, boolean b) throws CommandException {
226
        // Register file PIDs if needed
NEW
227
        PidProvider pidGenerator = ctxt.dvObjects().getEffectivePidGenerator(getDataset());
×
NEW
228
        boolean shouldRegister = !pidGenerator.registerWhenPublished() &&
×
NEW
229
                ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()) &&
×
NEW
230
                pidGenerator.canCreatePidsLike(getDataset().getGlobalId());
×
NEW
231
        if (shouldRegister) {
×
NEW
232
            for (DataFile dataFile : theDataset.getFiles()) {
×
NEW
233
                logger.fine(dataFile.getId() + " is registered?: " + dataFile.isIdentifierRegistered());
×
NEW
234
                if (!dataFile.isIdentifierRegistered()) {
×
235
                    // pre-register a persistent id
NEW
236
                    registerFileExternalIdentifier(dataFile, pidGenerator, ctxt, true);
×
237
                }
NEW
238
            }
×
239
        }
NEW
240
    }
×
241

242
    private void registerFileExternalIdentifier(DataFile dataFile, PidProvider pidProvider, CommandContext ctxt, boolean retry) throws CommandException {
243

NEW
244
        if (!dataFile.isIdentifierRegistered()) {
×
245

NEW
246
            if (pidProvider instanceof FakeDOIProvider) {
×
NEW
247
                retry = false; // No reason to allow a retry with the FakeProvider (even if it allows
×
248
                               // pre-registration someday), so set false for efficiency
249
            }
250
            try {
NEW
251
                if (pidProvider.alreadyRegistered(dataFile)) {
×
NEW
252
                    int attempts = 0;
×
NEW
253
                    if (retry) {
×
254
                        do {
NEW
255
                            pidProvider.generatePid(dataFile);
×
NEW
256
                            logger.log(Level.INFO, "Attempting to register external identifier for datafile {0} (trying: {1}).",
×
NEW
257
                                    new Object[] { dataFile.getId(), dataFile.getIdentifier() });
×
NEW
258
                            attempts++;
×
NEW
259
                        } while (pidProvider.alreadyRegistered(dataFile) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT);
×
260
                    }
NEW
261
                    if (!retry) {
×
NEW
262
                        logger.warning("Reserving File PID for: " + getDataset().getId() + ", fileId: " + dataFile.getId() + ", during publication failed.");
×
NEW
263
                        throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.filePidNotReserved", Arrays.asList(getDataset().getIdentifier())), this);
×
264
                    }
NEW
265
                    if (attempts > FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT) {
×
266
                        // Didn't work - we existed the loop with too many tries
NEW
267
                        throw new CommandExecutionException("This dataset may not be published because its identifier is already in use by another dataset; "
×
NEW
268
                                + "gave up after " + attempts + " attempts. Current (last requested) identifier: " + dataFile.getIdentifier(), this);
×
269
                    }
270
                }
271
                // Invariant: DataFile identifier does not exist in the remote registry
272
                try {
NEW
273
                    pidProvider.createIdentifier(dataFile);
×
NEW
274
                    dataFile.setGlobalIdCreateTime(getTimestamp());
×
NEW
275
                    dataFile.setIdentifierRegistered(true);
×
NEW
276
                } catch (Throwable ex) {
×
NEW
277
                    logger.info("Call to globalIdServiceBean.createIdentifier failed: " + ex);
×
NEW
278
                }
×
279

NEW
280
            } catch (Throwable e) {
×
NEW
281
                if (e instanceof CommandException) {
×
NEW
282
                    throw (CommandException) e;
×
283
                }
NEW
284
                throw new CommandException(BundleUtil.getStringFromBundle("file.register.error", pidProvider.getProviderInformation()), this);
×
NEW
285
            }
×
286
        } else {
NEW
287
            throw new IllegalCommandException("This datafile may not have a PID because its id registry service is not supported.", this);
×
288
        }
289

NEW
290
    }
×
291

292
    protected void checkSystemMetadataKeyIfNeeded(DatasetVersion newVersion, DatasetVersion persistedVersion) throws IllegalCommandException {
293
        Set<MetadataBlock> changedMDBs = DatasetVersionDifference.getBlocksWithChanges(newVersion, persistedVersion);
1✔
294
        for (MetadataBlock mdb : changedMDBs) {
1✔
295
            logger.fine(mdb.getName() + " has been changed");
1✔
296
            String smdbString = JvmSettings.MDB_SYSTEM_KEY_FOR.lookupOptional(mdb.getName())
1✔
297
                    .orElse(null);
1✔
298
            if (smdbString != null) {
1✔
299
                logger.fine("Found key: " + smdbString);
×
300
                String mdKey = getRequest().getSystemMetadataBlockKeyFor(mdb.getName());
×
301
                logger.fine("Found supplied key: " + mdKey);
×
302
                if (mdKey == null || !mdKey.equalsIgnoreCase(smdbString)) {
×
303
                    throw new IllegalCommandException("Updating system metadata in block " + mdb.getName() + " requires a valid key", this);
×
304
                }
305
            }
306
        }
1✔
307
    }
1✔
308

309
    protected void registerExternalVocabValuesIfAny(CommandContext ctxt, DatasetVersion newVersion) {
310
        for (DatasetField df : newVersion.getFlatDatasetFields()) {
1✔
311
            logger.fine("Found id: " + df.getDatasetFieldType().getId());
×
312
            if (ctxt.dsField().getCVocConf(true).containsKey(df.getDatasetFieldType().getId())) {
×
313
                ctxt.dsField().registerExternalVocabValues(df);
×
314
            }
315
        }
×
316
    }
1✔
317
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc