• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #23580

25 Oct 2024 08:18PM CUT coverage: 21.187% (+0.3%) from 20.87%
#23580

Pull #10790

github

web-flow
Merge ae058d107 into 5f5126a87
Pull Request #10790: fix: issues in exporters and citations for PermaLink/non-DOI PIDs

48 of 69 new or added lines in 7 files covered. (69.57%)

1410 existing lines in 14 files now uncovered.

18260 of 86183 relevant lines covered (21.19%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

54.58
/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
1
package edu.harvard.iq.dataverse.engine.command.impl;
2

3
import edu.harvard.iq.dataverse.DataFile;
4
import edu.harvard.iq.dataverse.DatasetVersion;
5
import edu.harvard.iq.dataverse.Dataverse;
6
import edu.harvard.iq.dataverse.authorization.Permission;
7
import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException;
8
import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker;
9
import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
10
import edu.harvard.iq.dataverse.engine.command.CommandContext;
11
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
12
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
13
import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException;
14
import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper;
15
import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
16
import edu.harvard.iq.dataverse.util.BundleUtil;
17
import edu.harvard.iq.dataverse.util.FileUtil;
18
import edu.harvard.iq.dataverse.util.ShapefileHandler;
19
import edu.harvard.iq.dataverse.util.StringUtil;
20
import edu.harvard.iq.dataverse.util.file.BagItFileHandler;
21
import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory;
22
import edu.harvard.iq.dataverse.util.file.CreateDataFileResult;
23
import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException;
24
import jakarta.enterprise.inject.spi.CDI;
25
import org.apache.commons.io.FileUtils;
26
import org.apache.commons.lang3.StringUtils;
27

28
import java.io.File;
29
import java.io.FileInputStream;
30
import java.io.IOException;
31
import java.io.InputStream;
32
import java.nio.charset.Charset;
33
import java.nio.file.Files;
34
import java.nio.file.Path;
35
import java.nio.file.Paths;
36
import java.nio.file.StandardCopyOption;
37
import java.text.MessageFormat;
38
import java.util.ArrayList;
39
import java.util.Arrays;
40
import java.util.Collections;
41
import java.util.HashMap;
42
import java.util.HashSet;
43
import java.util.List;
44
import java.util.Map;
45
import java.util.Optional;
46
import java.util.Set;
47
import java.util.logging.Logger;
48
import java.util.zip.GZIPInputStream;
49
import java.util.zip.ZipEntry;
50
import java.util.zip.ZipFile;
51

52
import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable;
53
import static edu.harvard.iq.dataverse.util.FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT;
54
import static edu.harvard.iq.dataverse.util.FileUtil.createIngestFailureReport;
55
import static edu.harvard.iq.dataverse.util.FileUtil.determineFileType;
56
import static edu.harvard.iq.dataverse.util.FileUtil.determineFileTypeByNameAndExtension;
57
import static edu.harvard.iq.dataverse.util.FileUtil.getFilesTempDirectory;
58
import static edu.harvard.iq.dataverse.util.FileUtil.saveInputStreamInTempFile;
59
import static edu.harvard.iq.dataverse.util.FileUtil.useRecognizedType;
60

61
/**
62
 *
63
 * @author landreev
64
 */
65
// Note the commented out @RequiredPermissions. We need to use dynamic 
66
// permissions instead, to accommodate both adding files to an existing 
67
// dataset and files being uploaded in the context of creating a new dataset
68
// via the Add Dataset page. 
69
//@RequiredPermissions( Permission.EditDataset )
70
public class CreateNewDataFilesCommand extends AbstractCommand<CreateDataFileResult> {
71
    private static final Logger logger = Logger.getLogger(CreateNewDataFilesCommand.class.getCanonicalName());
1✔
72
    
73
    private final DatasetVersion version;
74
    private final InputStream inputStream;
75
    private final String fileName;
76
    private final String suppliedContentType; 
77
    private final UploadSessionQuotaLimit quota;
78
    // parent Dataverse must be specified when the command is called on Create 
79
    // of a new dataset that does not exist in the database yet (for the purposes
80
    // of authorization - see getRequiredPermissions() below):
81
    private final Dataverse parentDataverse;
82
    // With Direct Upload the following values already exist and are passed to the command:
83
    private final String newStorageIdentifier; 
84
    private final String newCheckSum; 
85
    private DataFile.ChecksumType newCheckSumType;
86
    private final Long newFileSize;
87

88
    public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UploadSessionQuotaLimit quota, String newCheckSum) {
89
        this(aRequest, version, inputStream, fileName, suppliedContentType, newStorageIdentifier, quota, newCheckSum, null);
1✔
90
    }
1✔
91
    
92
    public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UploadSessionQuotaLimit quota, String newCheckSum, DataFile.ChecksumType newCheckSumType) {
93
        this(aRequest, version, inputStream, fileName, suppliedContentType, newStorageIdentifier, quota, newCheckSum, newCheckSumType, null, null);
1✔
94
    }
1✔
95
    
96
    // This version of the command must be used when files are created in the 
97
    // context of creating a brand new dataset (from the Add Dataset page):
98
    
99
    public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UploadSessionQuotaLimit quota, String newCheckSum, DataFile.ChecksumType newCheckSumType, Long newFileSize, Dataverse dataverse) {
100
        super(aRequest, dataverse);
1✔
101
        
102
        this.version = version;
1✔
103
        this.inputStream = inputStream;
1✔
104
        this.fileName = fileName;
1✔
105
        this.suppliedContentType = suppliedContentType; 
1✔
106
        this.newStorageIdentifier = newStorageIdentifier; 
1✔
107
        this.newCheckSum = newCheckSum; 
1✔
108
        this.newCheckSumType = newCheckSumType;
1✔
109
        this.parentDataverse = dataverse;
1✔
110
        this.quota = quota;
1✔
111
        this.newFileSize = newFileSize;
1✔
112
    }
1✔
113
    
114

115
    @Override
116
    public CreateDataFileResult execute(CommandContext ctxt) throws CommandException {
117
        List<DataFile> datafiles = new ArrayList<>();
1✔
118

119
        //When there is no checksum/checksumtype being sent (normal upload, needs to be calculated), set the type to the current default
120
        if(newCheckSumType == null) {
1✔
121
            newCheckSumType = ctxt.systemConfig().getFileFixityChecksumAlgorithm();
1✔
122
        }
123

124
        String warningMessage = null;
1✔
125

126
        // save the file, in the temporary location for now: 
127
        Path tempFile = null;
1✔
128

129
        Long fileSizeLimit = ctxt.systemConfig().getMaxFileUploadSizeForStore(version.getDataset().getEffectiveStorageDriverId());
1✔
130
        Long storageQuotaLimit = null; 
1✔
131
        
132
        if (ctxt.systemConfig().isStorageQuotasEnforced()) {
1✔
133
            if (quota != null) {
1✔
134
                storageQuotaLimit = quota.getRemainingQuotaInBytes();
1✔
135
            }
136
        }
137
        String finalType = null;
1✔
138
        File newFile = null;    // this File will be used for a single-file, local (non-direct) upload
1✔
139
        long fileSize = -1; 
1✔
140

141

142
        if (newStorageIdentifier == null) {
1✔
143
            var filesTempDirectory = getFilesTempDirectory();
1✔
144
            if (filesTempDirectory != null) {
1✔
145
                try {
146
                    tempFile = Files.createTempFile(Paths.get(filesTempDirectory), "tmp", "upload");
1✔
147
                    // "temporary" location is the key here; this is why we are not using
148
                    // the DataStore framework for this - the assumption is that
149
                    // temp files will always be stored on the local filesystem.
150
                    // -- L.A. Jul. 2014
151
                    logger.fine("Will attempt to save the file as: " + tempFile.toString());
1✔
152
                    Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);
1✔
153
                } catch (IOException ioex) {
1✔
154
                    throw new CommandExecutionException("Failed to save the upload as a temp file (temp disk space?)", ioex, this);
1✔
155
                }
1✔
156

157
                // A file size check, before we do anything else:
158
                // (note that "no size limit set" = "unlimited")
159
                // (also note, that if this is a zip file, we'll be checking
160
                // the size limit for each of the individual unpacked files)
161
                fileSize = tempFile.toFile().length();
1✔
162
                if (fileSizeLimit != null && fileSize > fileSizeLimit) {
1✔
163
                    try {
164
                        tempFile.toFile().delete();
1✔
UNCOV
165
                    } catch (Exception ex) {
×
166
                        // ignore - but log a warning
167
                        logger.warning("Could not remove temp file " + tempFile.getFileName());
×
168
                    }
1✔
169
                    throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit)), this);
1✔
170
                }
171

172
            } else {
UNCOV
173
                throw new CommandExecutionException("Temp directory is not configured.", this);
×
174
            }
175
            
176
            logger.fine("mime type supplied: " + suppliedContentType);
1✔
177
            
178
            // Let's try our own utilities (Jhove, etc.) to determine the file type
179
            // of the uploaded file. (We may already have a mime type supplied for this
180
            // file - maybe the type that the browser recognized on upload; or, if
181
            // it's a harvest, maybe the remote server has already given us the type
182
            // for this file... with our own type utility we may or may not do better
183
            // than the type supplied:
184
            // -- L.A.
185
            String recognizedType = null;
1✔
186

187
            try {
188
                recognizedType = determineFileType(tempFile.toFile(), fileName);
1✔
189
                logger.fine("File utility recognized the file as " + recognizedType);
1✔
190
                if (recognizedType != null && !recognizedType.equals("")) {
1✔
191
                    if (useRecognizedType(suppliedContentType, recognizedType)) {
1✔
192
                        finalType = recognizedType;
1✔
193
                    }
194
                }
195

196
            } catch (Exception ex) {
×
197
                logger.warning("Failed to run the file utility mime type check on file " + fileName);
×
198
            }
1✔
199

200
            if (finalType == null) {
1✔
201
                finalType = (suppliedContentType == null || suppliedContentType.equals(""))
×
202
                        ? MIME_TYPE_UNDETERMINED_DEFAULT
×
UNCOV
203
                        : suppliedContentType;
×
204
            }
205

206
            // A few special cases:
207
            // if this is a gzipped FITS file, we'll uncompress it, and ingest it as
208
            // a regular FITS file:
209
            if (finalType.equals("application/fits-gzipped")) {
1✔
210

211
                InputStream uncompressedIn = null;
×
UNCOV
212
                String finalFileName = fileName;
×
213
                // if the file name had the ".gz" extension, remove it,
214
                // since we are going to uncompress it:
215
                if (fileName != null && fileName.matches(".*\\.gz$")) {
×
UNCOV
216
                    finalFileName = fileName.replaceAll("\\.gz$", "");
×
217
                }
218

219
                DataFile datafile = null;
×
UNCOV
220
                long uncompressedFileSize = -1; 
×
221
                try {
222
                    uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile()));
×
223
                    File unZippedTempFile = saveInputStreamInTempFile(uncompressedIn, fileSizeLimit, storageQuotaLimit);
×
224
                    uncompressedFileSize = unZippedTempFile.length();
×
225
                    datafile = FileUtil.createSingleDataFile(version, unZippedTempFile, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT, ctxt.systemConfig().getFileFixityChecksumAlgorithm());
×
UNCOV
226
                } catch (IOException | FileExceedsMaxSizeException | FileExceedsStorageQuotaException ioex) {
×
227
                    // it looks like we simply skip the file silently, if its uncompressed size
228
                    // exceeds the limit. we should probably report this in detail instead.
UNCOV
229
                    datafile = null;
×
230
                } finally {
UNCOV
231
                    if (uncompressedIn != null) {
×
232
                        try {
233
                            uncompressedIn.close();
×
234
                        } catch (IOException e) {
×
UNCOV
235
                        }
×
236
                    }
237
                }
238

239
                // If we were able to produce an uncompressed file, we'll use it
240
                // to create and return a final DataFile; if not, we're not going
241
                // to do anything - and then a new DataFile will be created further
242
                // down, from the original, uncompressed file.
UNCOV
243
                if (datafile != null) {
×
244
                    // remove the compressed temp file:
245
                    try {
246
                        tempFile.toFile().delete();
×
UNCOV
247
                    } catch (SecurityException ex) {
×
248
                        // (this is very non-fatal)
249
                        logger.warning("Failed to delete temporary file " + tempFile.toString());
×
UNCOV
250
                    }
×
251

UNCOV
252
                    datafiles.add(datafile);
×
253
                    // Update quota if present
254
                    if (quota != null) {
×
UNCOV
255
                        quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + uncompressedFileSize);
×
256
                    }
UNCOV
257
                    return CreateDataFileResult.success(fileName, finalType, datafiles);
×
258
                }
259

260
                // If it's a ZIP file, we are going to unpack it and create multiple
261
                // DataFile objects from its contents:
262
            } else if (finalType.equals("application/zip")) {
1✔
263

264
                int fileNumberLimit = ctxt.systemConfig().getZipUploadFilesLimit();
1✔
265
                Long combinedUnzippedFileSize = 0L;
1✔
266

267
                try {
268
                    Charset charset = null;
1✔
269
                    /*
270
                        TODO: (?)
271
                        We may want to investigate somehow letting the user specify
272
                        the charset for the filenames in the zip file...
273
                    - otherwise, ZipInputStream bails out if it encounteres a file
274
                        name that's not valid in the current charest (i.e., UTF-8, in
275
                    our case). It would be a bit trickier than what we're doing for
276
                    SPSS tabular ingests - with the lang. encoding pulldown menu -
277
                        because this encoding needs to be specified *before* we upload and
278
                    attempt to unzip the file.
279
                                -- L.A. 4.0 beta12
280
                        logger.info("default charset is "+Charset.defaultCharset().name());
281
                        if (Charset.isSupported("US-ASCII")) {
282
                            logger.info("charset US-ASCII is supported.");
283
                            charset = Charset.forName("US-ASCII");
284
                            if (charset != null) {
285
                                   logger.info("was able to obtain charset for US-ASCII");
286
                            }
287

288
                         }
289
                     */
290

291
                    /**
292
                     * Perform a quick check for how many individual files are
293
                     * inside this zip archive. If it's above the limit, we can
294
                     * give up right away, without doing any unpacking.
295
                     * This should be a fairly inexpensive operation, we just need
296
                     * to read the directory at the end of the file.
297
                     */
298

299

300
                    /**
301
                     * The ZipFile constructors in openZipFile will throw ZipException -
302
                     * a type of IOException - if there's something wrong 
303
                     * with this file as a zip. There's no need to intercept it
304
                     * here, it will be caught further below, with other IOExceptions,
305
                     * at which point we'll give up on trying to unpack it and
306
                     * then attempt to save it as is.
307
                     */
308

309
                    int numberOfUnpackableFiles = 0;
1✔
310

311
                    /**
312
                     * Note that we can't just use zipFile.size(),
313
                     * unfortunately, since that's the total number of entries,
314
                     * some of which can be directories. So we need to go
315
                     * through all the individual zipEntries and count the ones
316
                     * that are files.
317
                     */
318

319
                    try (var zipFile = openZipFile(tempFile, charset)) {
1✔
320
                        var zipEntries = filteredZipEntries(zipFile);
1✔
321
                        for (var entry : zipEntries) {
1✔
322
                            logger.fine("inside first zip pass; this entry: " + entry.getName());
1✔
323
                            numberOfUnpackableFiles++;
1✔
324
                            if (numberOfUnpackableFiles > fileNumberLimit) {
1✔
UNCOV
325
                                logger.warning("Zip upload - too many files in the zip to process individually.");
×
326
                                warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit
×
327
                                                 + "); please upload a zip archive with fewer files, if you want them to be ingested "
328
                                                 + "as individual DataFiles.";
329
                                throw new IOException();
×
330
                            }
331
                            // In addition to counting the files, we can
332
                            // also check the file size while we're here,
333
                            // provided the size limit is defined; if a single
334
                            // file is above the individual size limit, unzipped,
335
                            // we give up on unpacking this zip archive as well:
336
                            if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) {
1✔
337
                                throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), bytesToHumanReadable(fileSizeLimit)));
×
338
                            }
339
                            // Similarly, we want to check if saving all these unpacked
340
                            // files is going to push the disk usage over the
341
                            // quota:
342
                            if (storageQuotaLimit != null) {
1✔
UNCOV
343
                                combinedUnzippedFileSize = combinedUnzippedFileSize + entry.getSize();
×
UNCOV
344
                                if (combinedUnzippedFileSize > storageQuotaLimit) {
×
345
                                    //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(combinedUnzippedFileSize), bytesToHumanReadable(storageQuotaLimit)));
346
                                    // change of plans: if the unzipped content inside exceeds the remaining quota,
347
                                    // we reject the upload outright, rather than accepting the zip
348
                                    // file as is.
349
                                    throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this);
×
350
                                }
351
                            }
352
                        }
1✔
353
                        // OK we're still here - that means we can proceed unzipping.
354

355
                        // reset:
356
                        combinedUnzippedFileSize = 0L;
1✔
357

358
                        for (var entry : zipEntries) {
1✔
359
                            if (datafiles.size() > fileNumberLimit) {
1✔
UNCOV
360
                                logger.warning("Zip upload - too many files.");
×
361
                                warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit
×
362
                                        + "); please upload a zip archive with fewer files, if you want them to be ingested "
363
                                        + "as individual DataFiles.";
UNCOV
364
                                throw new IOException();
×
365
                            }
366
                            var fileEntryName = entry.getName();
1✔
367
                            var shortName = getShortName(fileEntryName);
1✔
368
                            logger.fine("ZipEntry, file: " + fileEntryName);
1✔
369
                            String storageIdentifier = FileUtil.generateStorageIdentifier();
1✔
370
                            File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier);
1✔
371
                            Files.copy(zipFile.getInputStream(entry), unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
1✔
372
                            // No need to check the size of this unpacked file against the size limit,
373
                            // since we've already checked for that in the first pass.
374
                            DataFile datafile = FileUtil.createSingleDataFile(version, null, storageIdentifier, shortName,
1✔
375
                                MIME_TYPE_UNDETERMINED_DEFAULT,
376
                                ctxt.systemConfig().getFileFixityChecksumAlgorithm(), null, false);
1✔
377

378
                            if (!fileEntryName.equals(shortName)) {
1✔
379
                                // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes),
380
                                // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all
381
                                // the leading, trailing and duplicate slashes; then replace all the characters that
382
                                // don't pass our validation rules.
383
                                String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", "");
1✔
384
                                directoryName = StringUtil.sanitizeFileDirectory(directoryName, true);
1✔
385
                                // if (!"".equals(directoryName)) {
386
                                if (!StringUtil.isEmpty(directoryName)) {
1✔
387
                                    logger.fine("setting the directory label to " + directoryName);
1✔
388
                                    datafile.getFileMetadata().setDirectoryLabel(directoryName);
1✔
389
                                }
390
                            }
391

392
                            if (datafile != null) {
1✔
393
                                // We have created this datafile with the mime type "unknown";
394
                                // Now that we have it saved in a temporary location,
395
                                // let's try and determine its real type:
396

397
                                String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier();
1✔
398

399
                                try {
400
                                    recognizedType = determineFileType(unzippedFile, shortName);
1✔
401
                                    // null the File explicitly, to release any open FDs:
402
                                    unzippedFile = null;
1✔
403
                                    logger.fine("File utility recognized unzipped file as " + recognizedType);
1✔
404
                                    if (recognizedType != null && !recognizedType.equals("")) {
1✔
405
                                        datafile.setContentType(recognizedType);
1✔
406
                                    }
UNCOV
407
                                } catch (Exception ex) {
×
408
                                    logger.warning("Failed to run the file utility mime type check on file " + fileName);
×
409
                                }
1✔
410

411
                                datafiles.add(datafile);
1✔
412
                                combinedUnzippedFileSize += datafile.getFilesize();
1✔
413
                            }
414
                        }
1✔
415
                    }
416

UNCOV
417
                } catch (IOException ioex) {
×
418
                    // just clear the datafiles list and let
419
                    // ingest default to creating a single DataFile out
420
                    // of the unzipped file.
421
                    logger.warning("Unzipping failed; rolling back to saving the file as is.");
×
UNCOV
422
                    if (warningMessage == null) {
×
UNCOV
423
                        warningMessage = BundleUtil.getStringFromBundle("file.addreplace.warning.unzip.failed");
×
424
                    }
425

426
                    datafiles.clear();
×
427
                } catch (FileExceedsMaxSizeException femsx) {
×
UNCOV
428
                    logger.warning("One of the unzipped files exceeds the size limit; resorting to saving the file as is. " + femsx.getMessage());
×
UNCOV
429
                    warningMessage =  BundleUtil.getStringFromBundle("file.addreplace.warning.unzip.failed.size", Arrays.asList(FileSizeChecker.bytesToHumanReadable(fileSizeLimit)));
×
UNCOV
430
                    datafiles.clear();
×
431
                } /*catch (FileExceedsStorageQuotaException fesqx) {
1✔
432
                    //logger.warning("One of the unzipped files exceeds the storage quota limit; resorting to saving the file as is. " + fesqx.getMessage());
433
                    //warningMessage =  BundleUtil.getStringFromBundle("file.addreplace.warning.unzip.failed.quota", Arrays.asList(FileSizeChecker.bytesToHumanReadable(storageQuotaLimit)));
434
                    //datafiles.clear();
435
                    throw new CommandExecutionException(fesqx.getMessage(), fesqx, this);
436
                }*/
437
                if (!datafiles.isEmpty()) {
1✔
438
                    // remove the uploaded zip file:
439
                    try {
440
                        Files.delete(tempFile);
1✔
441
                    } catch (IOException ioex) {
×
442
                        // do nothing - it's just a temp file.
443
                        logger.warning("Could not remove temp file " + tempFile.getFileName().toString());
×
444
                    }
1✔
445
                    // update the quota object: 
446
                    if (quota != null) {
1✔
447
                        quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + combinedUnzippedFileSize);
1✔
448
                    }
449
                    // and return:
450
                    return CreateDataFileResult.success(fileName, finalType, datafiles);
1✔
451
                }
452

453
            } else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) {
1✔
454
                // Shape files may have to be split into multiple files,
455
                // one zip archive per each complete set of shape files:
456

457
                // File rezipFolder = new File(this.getFilesTempDirectory());
458
                File rezipFolder = FileUtil.getShapefileUnzipTempDirectory();
1✔
459

460
                IngestServiceShapefileHelper shpIngestHelper;
461
                shpIngestHelper = new IngestServiceShapefileHelper(tempFile.toFile(), rezipFolder);
1✔
462

463
                boolean didProcessWork = shpIngestHelper.processFile();
1✔
464
                if (!(didProcessWork)) {
1✔
465
                    logger.severe("Processing of zipped shapefile failed.");
×
466
                    return CreateDataFileResult.error(fileName, finalType);
×
467
                }
468
                long combinedRezippedFileSize = 0L;
1✔
469

470
                try {
471
                    
472
                    for (File finalFile : shpIngestHelper.getFinalRezippedFiles()) {
1✔
473
                        FileInputStream finalFileInputStream = new FileInputStream(finalFile);
1✔
474
                        finalType = FileUtil.determineContentType(finalFile);
1✔
475
                        if (finalType == null) {
1✔
UNCOV
476
                            logger.warning("Content type is null; but should default to 'MIME_TYPE_UNDETERMINED_DEFAULT'");
×
UNCOV
477
                            continue;
×
478
                        }
479

480
                        File unZippedShapeTempFile = saveInputStreamInTempFile(finalFileInputStream, fileSizeLimit, storageQuotaLimit != null ? storageQuotaLimit - combinedRezippedFileSize : null);
1✔
481
                        DataFile new_datafile = FileUtil.createSingleDataFile(version, unZippedShapeTempFile, finalFile.getName(), finalType, ctxt.systemConfig().getFileFixityChecksumAlgorithm());
1✔
482
                        
483
                        String directoryName = null;
1✔
484
                        String absolutePathName = finalFile.getParent();
1✔
485
                        if (absolutePathName != null) {
1✔
486
                            if (absolutePathName.length() > rezipFolder.toString().length()) {
1✔
487
                                // This file lives in a subfolder - we want to 
488
                                // preserve it in the FileMetadata:
489
                                directoryName = absolutePathName.substring(rezipFolder.toString().length() + 1);
1✔
490

491
                                if (!StringUtil.isEmpty(directoryName)) {
1✔
492
                                    new_datafile.getFileMetadata().setDirectoryLabel(directoryName);
1✔
493
                                }
494
                            }
495
                        }
496
                        if (new_datafile != null) {
1✔
497
                            datafiles.add(new_datafile);
1✔
498
                            combinedRezippedFileSize += unZippedShapeTempFile.length();
1✔
499
                            // todo: can this new_datafile be null?
500
                        } else {
501
                            logger.severe("Could not add part of rezipped shapefile. new_datafile was null: " + finalFile.getName());
×
502
                        }
503
                        try {
504
                            finalFileInputStream.close();
1✔
505
                        } catch (IOException ioex) {
×
506
                            // this one can be ignored
507
                        }
1✔
508
                    }
1✔
509
                } catch (FileExceedsMaxSizeException | FileExceedsStorageQuotaException femsx) {
×
UNCOV
510
                    logger.severe("One of the unzipped shape files exceeded the size limit, or the storage quota; giving up. " + femsx.getMessage());
×
UNCOV
511
                    datafiles.clear();
×
512
                    // (or should we throw an exception, instead of skipping it quietly?
513
                } catch (IOException ioex) {
×
UNCOV
514
                    throw new CommandExecutionException("Failed to process one of the components of the unpacked shape file", ioex, this);
×
515
                    // todo? - maybe try to provide a more detailed explanation, of which repackaged component, etc.?
516
                }
1✔
517

518
                // Delete the temp directory used for unzipping
519
                // The try-catch is due to error encountered in using NFS for stocking file,
520
                // cf. https://github.com/IQSS/dataverse/issues/5909
521
                try {
522
                    if (rezipFolder!=null)
1✔
523
                        FileUtils.deleteDirectory(rezipFolder);
1✔
UNCOV
524
                } catch (IOException ioex) {
×
525
                    // do nothing - it's a temp folder.
UNCOV
526
                    logger.warning("Could not remove temp folder, error message : " + ioex.getMessage());
×
527
                }
1✔
528

529
                if (!datafiles.isEmpty()) {
1✔
530
                    // remove the uploaded zip file:
531
                    try {
532
                        Files.delete(tempFile);
1✔
533
                    } catch (IOException ioex) {
×
534
                        // ignore - it's just a temp file - but let's log a warning
535
                        logger.warning("Could not remove temp file " + tempFile.getFileName().toString());
×
536
                    } catch (SecurityException se) {
×
537
                        // same
538
                        logger.warning("Unable to delete: " + tempFile.toString() + "due to Security Exception: "
×
UNCOV
539
                                + se.getMessage());
×
540
                    }
1✔
541
                    // update the quota object: 
542
                    if (quota != null) {
1✔
543
                        quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + combinedRezippedFileSize);
1✔
544
                    }
545
                    return CreateDataFileResult.success(fileName, finalType, datafiles);
1✔
546
                } else {
547
                    logger.severe("No files added from directory of rezipped shapefiles");
×
548
                }
549
                return CreateDataFileResult.error(fileName, finalType);
×
550

UNCOV
551
            } else if (finalType.equalsIgnoreCase(BagItFileHandler.FILE_TYPE)) {
×
552
                
553
                try { 
UNCOV
554
                    Optional<BagItFileHandler> bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler();
×
555
                    if (bagItFileHandler.isPresent()) {
×
556
                        CreateDataFileResult result = bagItFileHandler.get().handleBagItPackage(ctxt.systemConfig(), version, fileName, tempFile.toFile());
×
557
                        return result;
×
558
                    }
UNCOV
559
                } catch (IOException ioex) {
×
UNCOV
560
                    throw new CommandExecutionException("Failed to process uploaded BagIt file", ioex, this);
×
561
                }
×
562
            }
563
            
564
            // These are the final File and its size that will be used to 
565
            // add create a single Datafile: 
566
            
UNCOV
567
            newFile = tempFile.toFile();
×
568
            fileSize = newFile.length();
×
569
            
570
        } else {
×
571
            // Direct upload.
572
            
573
            // Since this is a direct upload, and therefore no temp file associated 
574
            // with it, we may, OR MAY NOT know the size of the file. If this is 
575
            // a direct upload via the UI, the page must have already looked up 
576
            // the size, after the client confirmed that the upload had completed. 
577
            // (so that we can reject the upload here, i.e. before the user clicks
578
            // save, if it's over the size limit or storage quota). However, if 
579
            // this is a direct upload via the API, we will wait until the 
580
            // upload is finalized in the saveAndAddFiles method to enforce the 
581
            // limits. 
582
            if (newFileSize != null) {
×
583
                fileSize = newFileSize;
×
584
                
585
                // if the size is specified, and it's above the individual size 
586
                // limit for this store, we can reject it now:
UNCOV
587
                if (fileSizeLimit != null && fileSize > fileSizeLimit) {
×
588
                    throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit)), this);
×
589
                }
590
            }
591
            
592
            // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied
UNCOV
593
            finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType;
×
594
            String type = determineFileTypeByNameAndExtension(fileName);
×
595
            if (!StringUtils.isBlank(type)) {
×
596
                //Use rules for deciding when to trust browser supplied type
597
                if (useRecognizedType(finalType, type)) {
×
598
                    finalType = type;
×
599
                }
600
                logger.fine("Supplied type: " + suppliedContentType + ", finalType: " + finalType);
×
601
            }
602
            
603
            
604
        }
605
        
606
        // Finally, if none of the special cases above were applicable (or 
607
        // if we were unable to unpack an uploaded file, etc.), we'll just 
608
        // create and return a single DataFile:
609
        
610
        
611
        // We have already checked that this file does not exceed the individual size limit; 
612
        // but if we are processing it as is, as a single file, we need to check if 
613
        // its size does not go beyond the allocated storage quota (if specified):
614
        
UNCOV
615
        if (storageQuotaLimit != null && fileSize > storageQuotaLimit) {
×
616
            if (newFile != null) {
×
617
                // Remove the temp. file, if this is a non-direct upload. 
618
                // If this is a direct upload, it will be a responsibility of the 
619
                // component calling the command to remove the file that may have
620
                // already been saved in the S3 volume. 
621
                try {
622
                    newFile.delete();
×
UNCOV
623
                } catch (Exception ex) {
×
624
                    // ignore - but log a warning
625
                    logger.warning("Could not remove temp file " + tempFile.getFileName());
×
626
                }
×
627
            }
628
            throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(fileSize), bytesToHumanReadable(storageQuotaLimit)), this);
×
629
        } 
630
        
631
        DataFile datafile = FileUtil.createSingleDataFile(version, newFile, newStorageIdentifier, fileName, finalType, newCheckSumType, newCheckSum);
×
632

UNCOV
633
        if (datafile != null) {
×
634

UNCOV
635
            if (warningMessage != null) {
×
UNCOV
636
                createIngestFailureReport(datafile, warningMessage);
×
UNCOV
637
                datafile.SetIngestProblem();
×
638
            }
639
            if (datafile.getFilesize() < 0) {
×
UNCOV
640
                datafile.setFilesize(fileSize);
×
641
            }
UNCOV
642
            datafiles.add(datafile);
×
643

644
            // Update the quota definition for the *current upload session*
645
            // This is relevant for the uploads going through the UI page 
646
            // (where there may be an appreciable amount of time between the user
647
            // uploading the files and clicking "save". The file size should be 
648
            // available here for both direct and local uploads via the UI. 
649
            // It is not yet available if this is direct-via-API - but 
650
            // for API uploads the quota check will be enforced during the final 
651
            // save. 
UNCOV
652
            if (fileSize > 0 && quota != null) {
×
653
                logger.info("Setting total usage in bytes to " + (quota.getTotalUsageInBytes() + fileSize));
×
654
                quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + fileSize);
×
655
            }
656

UNCOV
657
            return CreateDataFileResult.success(fileName, finalType, datafiles);
×
658
        }
659

UNCOV
660
        return CreateDataFileResult.error(fileName, finalType);
×
661
    }   // end createDataFiles
662

663
    private static List<? extends ZipEntry> filteredZipEntries(ZipFile zipFile) {
664
        var entries = Collections.list(zipFile.entries()).stream().filter(e -> {
1✔
665
            var entryName = e.getName();
1✔
666
            logger.fine("ZipEntry, file: " + entryName);
1✔
667
            return !e.isDirectory() && !entryName.isEmpty() && !isFileToSkip(entryName);
1✔
668
        }).toList();
1✔
669
        return entries;
1✔
670
    }
671

672
    private static ZipFile openZipFile(Path tempFile, Charset charset) throws IOException {
673
        if (charset != null) {
1✔
UNCOV
674
            return new ZipFile(tempFile.toFile(), charset);
×
675
        }
676
        else {
677
            return new ZipFile(tempFile.toFile());
1✔
678
        }
679
    }
680

681
    private static boolean isFileToSkip(String fileName) {
682
        // check if it's a "fake" file - a zip archive entry
683
        // created for a MacOS X filesystem element: (these
684
        // start with "._")
685
        var shortName = getShortName(fileName);
1✔
686
        return shortName.startsWith("._") || shortName.startsWith(".DS_Store") || "".equals(shortName);
1✔
687
    }
688

689
    private static String getShortName(String fileName) {
690
        return fileName.replaceFirst("^.*[\\/]", "");
1✔
691
    }
692

693
    @Override
694
    public Map<String, Set<Permission>> getRequiredPermissions() {
UNCOV
695
        Map<String, Set<Permission>> ret = new HashMap<>();
×
696

697
        ret.put("", new HashSet<>());
×
698
        
699
        if (parentDataverse != null) {
×
700
            // The command is called in the context of uploading files on 
701
            // create of a new dataset
702
            ret.get("").add(Permission.AddDataset);
×
703
        } else {
704
            // An existing dataset
UNCOV
705
            ret.get("").add(Permission.EditDataset);
×
706
        }
707

708
        return ret;
×
709
    }
710
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc