• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #22693

03 Jul 2024 01:09PM CUT coverage: 20.626% (-0.09%) from 20.716%
#22693

push

github

web-flow
Merge pull request #10664 from IQSS/develop

merge develop into master for 6.3

195 of 1852 new or added lines in 82 files covered. (10.53%)

72 existing lines in 33 files now uncovered.

17335 of 84043 relevant lines covered (20.63%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

29.14
/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
1
/*
2
   Copyright (C) 2005-2012, by the President and Fellows of Harvard College.
3

4
   Licensed under the Apache License, Version 2.0 (the "License");
5
   you may not use this file except in compliance with the License.
6
   You may obtain a copy of the License at
7

8
         http://www.apache.org/licenses/LICENSE-2.0
9

10
   Unless required by applicable law or agreed to in writing, software
11
   distributed under the License is distributed on an "AS IS" BASIS,
12
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
   See the License for the specific language governing permissions and
14
   limitations under the License.
15

16
   Dataverse Network - A web application to share, preserve and analyze research data.
17
   Developed at the Institute for Quantitative Social Science, Harvard University.
18
   Version 3.0.
19
*/
20

21
package edu.harvard.iq.dataverse.util;
22

23

24
import edu.harvard.iq.dataverse.*;
25
import edu.harvard.iq.dataverse.DataFile.ChecksumType;
26
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
27
import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
28
import edu.harvard.iq.dataverse.dataaccess.S3AccessIO;
29
import edu.harvard.iq.dataverse.dataset.DatasetThumbnail;
30
import edu.harvard.iq.dataverse.dataset.DatasetUtil;
31
import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException;
32

33
import static edu.harvard.iq.dataverse.api.ApiConstants.DS_VERSION_DRAFT;
34
import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable;
35
import edu.harvard.iq.dataverse.ingest.IngestReport;
36
import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
37
import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper;
38
import edu.harvard.iq.dataverse.ingest.IngestableDataChecker;
39
import edu.harvard.iq.dataverse.license.License;
40
import edu.harvard.iq.dataverse.settings.ConfigCheckService;
41
import edu.harvard.iq.dataverse.settings.JvmSettings;
42
import edu.harvard.iq.dataverse.util.file.BagItFileHandler;
43
import edu.harvard.iq.dataverse.util.file.CreateDataFileResult;
44
import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory;
45
import edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil;
46
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatDoc;
47
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.HTML_H1;
48
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.HTML_TABLE_HDR;
49
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatTitle;
50
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatTable;
51
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatTableCell;
52
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatLink;
53
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatTableCellAlignRight;
54
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatTableRow;
55

56
import java.io.BufferedInputStream;
57
import java.io.File;
58
import java.io.FileInputStream;
59
import java.io.FileNotFoundException;
60
import java.io.FileOutputStream;
61
import java.io.FileReader;
62
import java.io.IOException;
63
import java.io.InputStream;
64
import java.io.OutputStream;
65
import java.nio.charset.Charset;
66
import java.nio.file.Files;
67
import java.nio.file.Path;
68
import java.nio.file.Paths;
69
import java.nio.file.StandardCopyOption;
70
import java.security.MessageDigest;
71
import java.security.NoSuchAlgorithmException;
72
import java.sql.Timestamp;
73
import java.text.MessageFormat;
74
import java.text.SimpleDateFormat;
75
import java.time.LocalDate;
76
import java.util.Map;
77
import java.util.MissingResourceException;
78
import java.util.ArrayList;
79
import java.util.Date;
80
import java.util.HashMap;
81
import java.util.List;
82
import java.util.Optional;
83
import java.util.ResourceBundle;
84
import java.util.UUID;
85
import java.util.logging.Level;
86
import java.util.logging.Logger;
87
import jakarta.activation.MimetypesFileTypeMap;
88
import jakarta.ejb.EJBException;
89
import jakarta.enterprise.inject.spi.CDI;
90
import jakarta.json.JsonArray;
91
import jakarta.json.JsonObject;
92
import javax.xml.stream.XMLStreamConstants;
93
import javax.xml.stream.XMLStreamException;
94
import javax.xml.stream.XMLStreamReader;
95

96

97
import java.util.zip.GZIPInputStream;
98
import org.apache.commons.io.FilenameUtils;
99

100
import edu.harvard.iq.dataverse.dataaccess.DataAccessOption;
101
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
102
import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException;
103
import java.util.Arrays;
104
import org.apache.commons.io.IOUtils;
105
import org.apache.commons.lang3.StringUtils;
106
import ucar.nc2.NetcdfFile;
107
import ucar.nc2.NetcdfFiles;
108

109
/**
110
 * a 4.0 implementation of the DVN FileUtil;
111
 * it provides some of the functionality from the 3.6 implementation, 
112
 * but the old code is ported creatively on the method-by-method basis.
113
 * 
114
 * @author Leonid Andreev
115
 */
116
public class FileUtil implements java.io.Serializable  {
117
    private static final Logger logger = Logger.getLogger(FileUtil.class.getCanonicalName());
1✔
118
    
119
    private static final String[] TABULAR_DATA_FORMAT_SET = {"POR", "SAV", "DTA", "RDA"};
1✔
120
    
121
    private static Map<String, String> STATISTICAL_FILE_EXTENSION = new HashMap<String, String>();
1✔
122
    
123
    /*
124
     * The following are Stata, SAS and SPSS syntax/control cards: 
125
     * These are recognized as text files (because they are!) so 
126
     * we check all the uploaded "text/plain" files for these extensions, and 
127
     * assign the following types when they are matched;
128
     * Note that these types are only used in the metadata displayed on the 
129
     * dataset page. We don't support ingest on control cards. 
130
     * -- L.A. 4.0 Oct. 2014
131
    */
132
    
133
    static {
134
        STATISTICAL_FILE_EXTENSION.put("do",  "application/x-stata-syntax");
1✔
135
        STATISTICAL_FILE_EXTENSION.put("sas", "application/x-sas-syntax");
1✔
136
        STATISTICAL_FILE_EXTENSION.put("sps", "application/x-spss-syntax");
1✔
137
        STATISTICAL_FILE_EXTENSION.put("csv", "text/csv");
1✔
138
        STATISTICAL_FILE_EXTENSION.put("tsv", "text/tsv");
1✔
139
    }
140
    
141
    private static MimetypesFileTypeMap MIME_TYPE_MAP = new MimetypesFileTypeMap();
1✔
142
    
143
    public static final String MIME_TYPE_STATA   = "application/x-stata";
144
    public static final String MIME_TYPE_STATA13 = "application/x-stata-13";
145
    public static final String MIME_TYPE_STATA14 = "application/x-stata-14";
146
    public static final String MIME_TYPE_STATA15 = "application/x-stata-15";
147
    public static final String MIME_TYPE_RDATA   = "application/x-rlang-transport";
148
    
149
    public static final String MIME_TYPE_CSV     = "text/csv";
150
    public static final String MIME_TYPE_CSV_ALT = "text/comma-separated-values";
151
    public static final String MIME_TYPE_TSV     = "text/tsv";
152
    public static final String MIME_TYPE_TSV_ALT = "text/tab-separated-values";
153
    public static final String MIME_TYPE_XLSX    = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
154
    
155
    public static final String MIME_TYPE_SPSS_SAV = "application/x-spss-sav";
156
    public static final String MIME_TYPE_SPSS_POR = "application/x-spss-por";
157
    
158
    
159
    public static final String MIME_TYPE_FITS  = "application/fits";
160
    
161
    public static final String MIME_TYPE_ZIP   = "application/zip";
162
    
163
    public static final String MIME_TYPE_FITSIMAGE = "image/fits";
164
    // SHAPE file type: 
165
    // this is the only supported file type in the GEO DATA class:
166
    
167
    public static final String MIME_TYPE_GEO_SHAPE = "application/zipped-shapefile";
168
    
169
    public static final String MIME_TYPE_UNDETERMINED_DEFAULT = "application/octet-stream";
170
    public static final String MIME_TYPE_UNDETERMINED_BINARY = "application/binary";
171
    
172
    public static final String SAVED_ORIGINAL_FILENAME_EXTENSION = "orig";
173
    
174
    //Todo - this is the same as MIME_TYPE_TSV_ALT
175
    public static final String MIME_TYPE_INGESTED_FILE = "text/tab-separated-values";
176

177
    public static final String MIME_TYPE_NETCDF = "application/netcdf";
178
    public static final String MIME_TYPE_XNETCDF = "application/x-netcdf";
179
    public static final String MIME_TYPE_HDF5 = "application/x-hdf5";
180
    public static final String MIME_TYPE_RO_CRATE = "application/ld+json; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\"";
181

182
    // File type "thumbnail classes" tags:
183
    
184
    public static final String FILE_THUMBNAIL_CLASS_AUDIO = "audio";
185
    public static final String FILE_THUMBNAIL_CLASS_CODE = "code";
186
    public static final String FILE_THUMBNAIL_CLASS_DOCUMENT = "document";
187
    public static final String FILE_THUMBNAIL_CLASS_ASTRO = "astro";
188
    public static final String FILE_THUMBNAIL_CLASS_IMAGE = "image";
189
    public static final String FILE_THUMBNAIL_CLASS_NETWORK = "network";
190
    public static final String FILE_THUMBNAIL_CLASS_GEOSHAPE = "geodata";
191
    public static final String FILE_THUMBNAIL_CLASS_TABULAR = "tabular";
192
    public static final String FILE_THUMBNAIL_CLASS_VIDEO = "video";
193
    public static final String FILE_THUMBNAIL_CLASS_PACKAGE = "package";
194
    public static final String FILE_THUMBNAIL_CLASS_OTHER = "other";
195
    
196
    // File type facets, as returned by the getFacetFileType() method in this utility: 
197
    
198
    private static final String FILE_FACET_CLASS_ARCHIVE = "Archive";
199
    private static final String FILE_FACET_CLASS_AUDIO = "Audio";
200
    private static final String FILE_FACET_CLASS_CODE = "Code";
201
    private static final String FILE_FACET_CLASS_DATA = "Data";
202
    private static final String FILE_FACET_CLASS_DOCUMENT = "Document";
203
    private static final String FILE_FACET_CLASS_ASTRO = "FITS";
204
    private static final String FILE_FACET_CLASS_IMAGE = "Image";
205
    private static final String FILE_FACET_CLASS_NETWORK = "Network Data";
206
    private static final String FILE_FACET_CLASS_GEOSHAPE = "Shape";
207
    private static final String FILE_FACET_CLASS_TABULAR = "Tabular Data";
208
    private static final String FILE_FACET_CLASS_VIDEO = "Video";
209
    private static final String FILE_FACET_CLASS_TEXT = "Text";
210
    private static final String FILE_FACET_CLASS_OTHER = "Other";
211
    private static final String FILE_FACET_CLASS_UNKNOWN = "Unknown";
212

213
    // The file type facets and type-specific thumbnail classes (above) are
214
    // very similar, but not exactly 1:1; so the following map is for 
215
    // maintaining the relationship between the two:
216
    
217
    public static Map<String, String> FILE_THUMBNAIL_CLASSES = new HashMap<String, String>();
1✔
218
    
219
    static {
220
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_VIDEO, FILE_THUMBNAIL_CLASS_VIDEO);
1✔
221
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_AUDIO, FILE_THUMBNAIL_CLASS_AUDIO);
1✔
222
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_CODE, FILE_THUMBNAIL_CLASS_CODE);
1✔
223
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_DATA, FILE_THUMBNAIL_CLASS_TABULAR);
1✔
224
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_NETWORK, FILE_THUMBNAIL_CLASS_NETWORK);
1✔
225
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_ASTRO, FILE_THUMBNAIL_CLASS_ASTRO);
1✔
226
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_IMAGE, FILE_THUMBNAIL_CLASS_IMAGE);
1✔
227
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_DOCUMENT, FILE_THUMBNAIL_CLASS_DOCUMENT);
1✔
228
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_GEOSHAPE, FILE_THUMBNAIL_CLASS_GEOSHAPE);
1✔
229
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_TABULAR, FILE_THUMBNAIL_CLASS_TABULAR);
1✔
230
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_TEXT, FILE_THUMBNAIL_CLASS_DOCUMENT);
1✔
231
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_OTHER, FILE_THUMBNAIL_CLASS_OTHER);
1✔
232
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_UNKNOWN, FILE_THUMBNAIL_CLASS_OTHER);
1✔
233
        FILE_THUMBNAIL_CLASSES.put(FILE_FACET_CLASS_ARCHIVE, FILE_THUMBNAIL_CLASS_PACKAGE);
1✔
234
    }
235
    
236
    private static final String FILE_LIST_DATE_FORMAT = "d-MMMM-yyyy HH:mm";
237

238
    /**
239
     * This string can be prepended to a Base64-encoded representation of a PNG
240
     * file in order to imbed an image directly into an HTML page using the
241
     * "img" tag. See also https://en.wikipedia.org/wiki/Data_URI_scheme
242
     */
243
    public static String DATA_URI_SCHEME = "data:image/png;base64,";
1✔
244

245
    public FileUtil() {
×
246
    }
×
247
    
248
   
249
    public static String getFileExtension(String fileName){
250
        String ext = null;
1✔
251
        if ( fileName.lastIndexOf(".") != -1){
1✔
252
            ext = (fileName.substring( fileName.lastIndexOf(".") + 1 )).toLowerCase();
1✔
253
        }
254
        return ext;
1✔
255
    } 
256

257
    public static String replaceExtension(String originalName) {
258
       return replaceExtension(originalName, "tab");
×
259
    }   
260
    
261
    public static String replaceExtension(String originalName, String newExtension) {
262
        int extensionIndex = originalName.lastIndexOf(".");
1✔
263
        if (extensionIndex != -1 ) {
1✔
264
            return originalName.substring(0, extensionIndex) + "."+newExtension ;
1✔
265
        } else {
266
            return originalName +"."+newExtension ;
1✔
267
        }
268
    }
269
    
270
    public static String getUserFriendlyFileType(DataFile dataFile) {
271
        String fileType = dataFile.getContentType();
1✔
272
         
273
        if (fileType != null) {
1✔
274
            if (fileType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)){
1✔
275
                return ShapefileHandler.SHAPEFILE_FILE_TYPE_FRIENDLY_NAME;
×
276
            }
277
            try {
278
                return BundleUtil.getStringFromPropertyFile(fileType,"MimeTypeDisplay" );
1✔
279
            } catch (MissingResourceException e) {
1✔
280
                //NOOP: we will try again after trimming ";"
281
            }
282
            if (fileType.contains(";")) {
1✔
283
                fileType = fileType.substring(0, fileType.indexOf(";"));
1✔
284
            }
285
            try {
286
                return BundleUtil.getStringFromPropertyFile(fileType,"MimeTypeDisplay" );
1✔
287
            } catch (MissingResourceException e) {
×
288
                return fileType;
×
289
            }
290
        }
291

292
        return fileType;
1✔
293
    }
294

295
    public static String getIndexableFacetFileType(DataFile dataFile) {
296
        try {
297
            return BundleUtil.getStringFromDefaultPropertyFile(dataFile.getContentType(),"MimeTypeFacets" );
1✔
298
        } catch (MissingResourceException e) {
1✔
299
            //NOOP: we will try again after trimming ";"
300
        }
301
        String fileType = getFileType(dataFile);
1✔
302
        try {
303
            return BundleUtil.getStringFromDefaultPropertyFile(fileType,"MimeTypeFacets"  );
1✔
304
        } catch (MissingResourceException ex) {
×
305
            // if there's no defined "facet-friendly" form of this mime type
306
            // we'll truncate the available type by "/", e.g., all the
307
            // unknown image/* types will become "image".
308
            // Since many other, quite different types would then all become
309
            // "application" - we will use the facet "Other" for all the
310
            // application/* types not specifically defined in the properties file.
311
            //
312
            // UPDATE, MH 4.9.2
313
            // Since production is displaying both "tabulardata" and "Tabular Data"
314
            // we are going to try to add capitalization here to this function
315
            // in order to capitalize all the unknown types that are not called
316
            // out in MimeTypeFacets.properties
317

318
            if (!StringUtil.isEmpty(fileType)) {
×
319
                String typeClass = fileType.split("/")[0];
×
320
                if ("application".equalsIgnoreCase(typeClass)) {
×
321
                    return FILE_FACET_CLASS_OTHER;
×
322
                }
323

324
                return Character.toUpperCase(typeClass.charAt(0)) + typeClass.substring(1);
×
325
            } else {
326
                return null;
×
327
            }
328
        }
329
    }
330

331
    public static String getFileType(DataFile dataFile) {
332
        String fileType = dataFile.getContentType();
1✔
333

334
        if (!StringUtil.isEmpty(fileType)) {
1✔
335
            if (fileType.contains(";")) {
1✔
336
                fileType = fileType.substring(0, fileType.indexOf(";"));
1✔
337
            }
338
            return fileType;
1✔
339
        } else {
340
            return "application/octet-stream";
1✔
341
        }
342

343
    }
344

345
    public static String getFacetFileType(DataFile dataFile) {
346
        String fileType = getFileType(dataFile);
1✔
347
        try {
348
            return BundleUtil.getStringFromPropertyFile(fileType,"MimeTypeFacets"  );
1✔
349
        } catch (MissingResourceException ex) {
1✔
350
            // if there's no defined "facet-friendly" form of this mime type
351
            // we'll truncate the available type by "/", e.g., all the
352
            // unknown image/* types will become "image".
353
            // Since many other, quite different types would then all become
354
            // "application" - we will use the facet "Other" for all the
355
            // application/* types not specifically defined in the properties file.
356
            //
357
            // UPDATE, MH 4.9.2
358
            // Since production is displaying both "tabulardata" and "Tabular Data"
359
            // we are going to try to add capitalization here to this function
360
            // in order to capitalize all the unknown types that are not called
361
            // out in MimeTypeFacets.properties
362

363
            if (!StringUtil.isEmpty(fileType)) {
1✔
364
                String typeClass = fileType.split("/")[0];
1✔
365
                if ("application".equalsIgnoreCase(typeClass)) {
1✔
366
                    return FILE_FACET_CLASS_OTHER;
×
367
                }
368

369
                return Character.toUpperCase(typeClass.charAt(0)) + typeClass.substring(1);
1✔
370
            }
371
            else
372
            {
373
                return  null;
×
374
            }
375
        }
376
    }
377
    
378
    public static String getUserFriendlyOriginalType(DataFile dataFile) {
379
        if (!dataFile.isTabularData()) {
1✔
380
            return null; 
×
381
        }
382
        
383
        String fileType = dataFile.getOriginalFileFormat();
1✔
384
         
385
        if (fileType != null && !fileType.equals("")) {
1✔
386
            if (fileType.contains(";")) {
×
387
                fileType = fileType.substring(0, fileType.indexOf(";"));
×
388
            }
389
            try {
390
                return BundleUtil.getStringFromPropertyFile(fileType,"MimeTypeDisplay" );
×
391
            } catch (MissingResourceException e) {
×
392
                return fileType;
×
393
            }
394
        } 
395
        
396
        return "UNKNOWN";
1✔
397
    }
398
    
399
    /**
400
     *  Returns a content type string for a FileObject
401
     * 
402
     */
403
    public static String determineContentType(File fileObject) {
404
        if (fileObject==null){
×
405
            return null;
×
406
        }
407
        String contentType;
408
        try {
409
            contentType = determineFileType(fileObject, fileObject.getName());
×
410
        } catch (Exception ex) {
×
411
            logger.warning("FileUtil.determineFileType failed for file with name: " + fileObject.getName());
×
412
            contentType = null;
×
413
        }
×
414

415
       if ((contentType==null)||(contentType.equals(""))){
×
416
            contentType = MIME_TYPE_UNDETERMINED_DEFAULT;
×
417
       }
418
       return contentType;
×
419
        
420
    }
421
    
422
    public static String retestIngestableFileType(File file, String fileType) {
423
        IngestableDataChecker tabChecker = new IngestableDataChecker(TABULAR_DATA_FORMAT_SET);
×
424
        String newType = tabChecker.detectTabularDataFormat(file);
×
425
        
426
        return newType != null ? newType : fileType;
×
427
    }
428
    
429
    public static String determineFileType(File f, String fileName) throws IOException{
430
        String fileType = lookupFileTypeByFileName(fileName);
1✔
431
        if (fileType != null) {
1✔
432
            return fileType;
1✔
433
        }
434
        String fileExtension = getFileExtension(fileName);
1✔
435
        
436
        
437
        
438
        // step 1: 
439
        // Apply our custom methods to try and recognize data files that can be 
440
        // converted to tabular data, or can be parsed for extra metadata 
441
        // (such as FITS).
442
        logger.fine("Attempting to identify potential tabular data files;");
1✔
443
        IngestableDataChecker tabChk = new IngestableDataChecker(TABULAR_DATA_FORMAT_SET);
1✔
444
        
445
        fileType = tabChk.detectTabularDataFormat(f);
1✔
446
        
447
        logger.fine("determineFileType: tabular data checker found "+fileType);
1✔
448
                
449
        // step 2: If not found, check if graphml or FITS
450
        if (fileType==null) {
1✔
451
            if (isGraphMLFile(f))  {
1✔
452
                fileType = "text/xml-graphml";
×
453
            } else // Check for FITS:
454
            // our check is fairly weak (it appears to be hard to really
455
            // really recognize a FITS file without reading the entire 
456
            // stream...), so in version 3.* we used to nsist on *both* 
457
            // the ".fits" extension and the header check;
458
            // in 4.0, we'll accept either the extension, or the valid 
459
            // magic header:
460
            if (isFITSFile(f) || (fileExtension != null
1✔
461
                    && fileExtension.equalsIgnoreCase("fits"))) {
1✔
462
                fileType = "application/fits";
×
463
            }
464
        }
465

466
        // step 3a: Check if NetCDF or HDF5
467
        if (fileType == null) {
1✔
468
            fileType = checkNetcdfOrHdf5(f);
1✔
469
        }
470
       
471
        // step 3: check the mime type of this file with Jhove
472
        if (fileType == null){
1✔
473
            JhoveFileType jw = new JhoveFileType();
1✔
474
            String mimeType = jw.getFileMimeType(f);
1✔
475
            if (mimeType != null) {
1✔
476
                fileType = mimeType;
×
477
            }
478
        }
479
        
480
        // step 4: 
481
        // Additional processing; if we haven't gotten much useful information 
482
        // back from Jhove, we'll try and make an educated guess based on 
483
        // the file name and extension:
484

485
        if ( fileExtension != null) {
1✔
486
            logger.fine("fileExtension="+fileExtension);
1✔
487

488
            if (fileType == null || fileType.startsWith("text/plain") || "application/octet-stream".equals(fileType)) {
1✔
489
                if (fileType != null && fileType.startsWith("text/plain") && STATISTICAL_FILE_EXTENSION.containsKey(fileExtension)) {
1✔
490
                    fileType = STATISTICAL_FILE_EXTENSION.get(fileExtension);
×
491
                } else {
492
                    fileType = lookupFileTypeByExtension(fileName);
1✔
493
                }
494

495
                logger.fine("mime type recognized by extension: "+fileType);
1✔
496
            }
497
        } else {
498
            logger.fine("fileExtension is null");
1✔
499
            final String fileTypeByExtension = lookupFileTypeByExtensionFromPropertiesFile(fileName);
1✔
500
            if(!StringUtil.isEmpty(fileTypeByExtension)) {
1✔
NEW
501
                logger.fine(String.format("mime type: %s recognized by extension: %s", fileTypeByExtension, fileName));
×
NEW
502
                fileType = fileTypeByExtension;
×
503
            }
504
        }
505
        
506
        // step 5: 
507
        // if this is a compressed file - zip or gzip - we'll check the 
508
        // file(s) inside the compressed stream and see if it's one of our
509
        // recognized formats that we want to support compressed:
510

511
        if ("application/x-gzip".equals(fileType)) {
1✔
512
            logger.fine("we'll run additional checks on this gzipped file.");
1✔
513
            try (FileInputStream gzippedIn = new FileInputStream(f);
1✔
514
                     InputStream uncompressedIn = new GZIPInputStream(gzippedIn)) {
1✔
515
                if (isFITSFile(uncompressedIn)) {
1✔
516
                    fileType = "application/fits-gzipped";
1✔
517
                }
518
            } catch (IOException ioex) {
×
NEW
519
                logger.warning("IOException while processing gzipped FITS file: " + ioex.getMessage());
×
520
            }
1✔
521
        }
522
        if ("application/zip".equals(fileType)) {
1✔
523
            
524
            // Is this a zipped Shapefile?
525
            // Check for shapefile extensions as described here: http://en.wikipedia.org/wiki/Shapefile
526
            //logger.info("Checking for shapefile");
527

528
            ShapefileHandler shp_handler = new ShapefileHandler(new FileInputStream(f));
×
529
             if (shp_handler.containsShapefile()){
×
530
              //  logger.info("------- shapefile FOUND ----------");
531
                 fileType = ShapefileHandler.SHAPEFILE_FILE_TYPE; //"application/zipped-shapefile";
×
532
             }
533

534
            Optional<BagItFileHandler> bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler();
×
535
             if(bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) {
×
536
                 fileType = BagItFileHandler.FILE_TYPE;
×
537
             }
538
        } 
539
        
540
        if(fileType==null) {
1✔
541
            fileType = MIME_TYPE_UNDETERMINED_DEFAULT;
1✔
542
        }
543
        logger.fine("returning fileType "+fileType);
1✔
544
        return fileType;
1✔
545
    }
546

547
    public static String determineFileTypeByNameAndExtension(final String fileName) {
NEW
548
        final String fileType = lookupFileTypeByFileName(fileName);
×
NEW
549
        if (fileType != null) {
×
NEW
550
            return fileType;
×
551
        }
NEW
552
        return lookupFileTypeByExtension(fileName);
×
553
    }
554

555
    private static String lookupFileTypeByExtension(final String fileName) {
556
        final String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName);
1✔
557
        logger.fine("MimetypesFileTypeMap type by extension, for " + fileName + ": " + mimetypesFileTypeMapResult);
1✔
558
        if (mimetypesFileTypeMapResult == null) {
1✔
559
            return null;
×
560
        }
561
        if ("application/octet-stream".equals(mimetypesFileTypeMapResult)) {
1✔
NEW
562
            return lookupFileTypeByExtensionFromPropertiesFile(fileName);
×
563
        }
564
        return mimetypesFileTypeMapResult;
1✔
565
    }
566

567
    private static String lookupFileTypeByFileName(final String fileName) {
568
        return lookupFileTypeFromPropertiesFile("MimeTypeDetectionByFileName", fileName);
1✔
569
    }
570

571
    private static String lookupFileTypeByExtensionFromPropertiesFile(final String fileName) {
572
        final String fileKey = FilenameUtils.getExtension(fileName);
1✔
573
        return lookupFileTypeFromPropertiesFile("MimeTypeDetectionByFileExtension", fileKey);
1✔
574
    }
575

576
    private static String lookupFileTypeFromPropertiesFile(final String propertyFileName, final String fileKey) {
577
        final String propertyFileNameOnDisk =  propertyFileName + ".properties";
1✔
578
        try {
579
            logger.fine("checking " + propertyFileNameOnDisk + " for file key " + fileKey);
1✔
580
            return BundleUtil.getStringFromPropertyFile(fileKey, propertyFileName);
1✔
581
        } catch (final MissingResourceException ex) {
1✔
582
            logger.info(fileKey + " is a filename/extension Dataverse doesn't know about. Consider adding it to the " + propertyFileNameOnDisk + " file.");
1✔
583
            return null;
1✔
584
        }
585
    }
586

587
    /* 
588
     * Custom method for identifying FITS files: 
589
     * TODO: 
590
     * the existing check for the "magic header" is very weak (see below); 
591
     * it should probably be replaced by attempting to parse and read at 
592
     * least the primary HDU, using the NOM fits parser. 
593
     * -- L.A. 4.0 alpha
594
    */
595
    private static boolean isFITSFile(File file) {
596

597
        try (BufferedInputStream ins = new BufferedInputStream(new FileInputStream(file))) {
1✔
598
            return isFITSFile(ins);
1✔
599
        } catch (IOException ex) {
×
600
            logger.fine("IOException: "+ ex.getMessage());
×
601
        } 
602
        
603
        return false;
×
604
    }
605
     
606
    private static boolean isFITSFile(InputStream ins) {
607
        boolean isFITS = false;
1✔
608

609
        // number of header bytes read for identification: 
610
        int magicWordLength = 6;
1✔
611
        String magicWord = "SIMPLE";
1✔
612

613
        try {
614
            byte[] b = new byte[magicWordLength];
1✔
615
            logger.fine("attempting to read "+magicWordLength+" bytes from the FITS format candidate stream.");
1✔
616
            if (ins.read(b, 0, magicWordLength) != magicWordLength) {
1✔
617
                throw new IOException();
×
618
            }
619

620
            if (magicWord.equals(new String(b))) {
1✔
621
                logger.fine("yes, this is FITS file!");
1✔
622
                isFITS = true;
1✔
623
            }
624
        } catch (IOException ex) {
×
625
            isFITS = false; 
×
626
        } finally {
627
            if (ins != null) {
1✔
628
                try {
629
                    ins.close();
1✔
630
                } catch (Exception e) {
×
631
                }
1✔
632
            }
633
        }
634
    
635
        return isFITS;
1✔
636
    }
637
    
638
    private static boolean isGraphMLFile(File file) {
639
        boolean isGraphML = false;
1✔
640
        logger.fine("begin isGraphMLFile()");
1✔
641
        FileReader fileReader = null;
1✔
642
        try{
643
            fileReader = new FileReader(file);
1✔
644
            javax.xml.stream.XMLInputFactory xmlif = javax.xml.stream.XMLInputFactory.newInstance();
1✔
645
            xmlif.setProperty("javax.xml.stream.isCoalescing", java.lang.Boolean.TRUE);
1✔
646

647
            XMLStreamReader xmlr = xmlif.createXMLStreamReader(fileReader);
1✔
648
            for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) {
×
649
                if (event == XMLStreamConstants.START_ELEMENT) {
×
650
                    if (xmlr.getLocalName().equals("graphml")) {
×
651
                        String schema = xmlr.getAttributeValue("http://www.w3.org/2001/XMLSchema-instance", "schemaLocation");
×
652
                        logger.fine("schema = "+schema);
×
653
                        if (schema!=null && schema.contains("http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd")){
×
654
                            logger.fine("graphML is true");
×
655
                            isGraphML = true;
×
656
                        }
657
                    }
×
658
                    break;
659
                }
660
            }
661
        } catch(XMLStreamException e) {
1✔
662
            logger.fine("XML error - this is not a valid graphML file.");
1✔
663
            isGraphML = false;
1✔
664
        } catch(IOException e) {
×
665
            throw new EJBException(e);
×
666
        } finally {
667
            if (fileReader != null) {
1✔
668
                try {
669
                    fileReader.close();
1✔
670
                } catch (IOException ioex) {
×
671
                    logger.warning("IOException closing file reader in GraphML type checker");
×
672
                }
1✔
673
            }
674
        }
675
        logger.fine("end isGraphML()");
1✔
676
        return isGraphML;
1✔
677
    }
678

679
    public static String checkNetcdfOrHdf5(File file) {
680
        try ( NetcdfFile netcdfFile = NetcdfFiles.open(file.getAbsolutePath())) {
1✔
681
            if (netcdfFile == null) {
1✔
682
                // Can't open as a NetCDF or HDF5 file.
683
                return null;
×
684
            }
685
            String type = netcdfFile.getFileTypeId();
1✔
686
            if (type == null) {
1✔
687
                return null;
×
688
            }
689
            switch (type) {
1✔
690
                case "NetCDF":
691
                    return "application/netcdf";
1✔
692
                case "NetCDF-4":
693
                    return "application/netcdf";
×
694
                case "HDF5":
695
                    return "application/x-hdf5";
1✔
696
                default:
697
                    break;
698
            }
699
        } catch (IOException ex) {
1✔
700
            /**
701
             * When an HDF4 file is passed, it won't be detected. Instead, we've
702
             * seen exceptions like this:
703
             *
704
             * ucar.nc2.internal.iosp.hdf4.H4header makeDimension WARNING:
705
             * **dimension length=0 for TagVGroup= *refno=124 tag= VG (1965)
706
             * Vgroup length=28 class= Dim0.0 name= ixx using data 123
707
             *
708
             * java.lang.IllegalArgumentException: Dimension length =0 must be >
709
             * 0
710
             */
711
            return null;
1✔
712
        }
×
713
        return null;
×
714
    }
715

716
    // from MD5Checksum.java
717
    public static String calculateChecksum(String datafile, ChecksumType checksumType) {
718

719
        FileInputStream fis = null;
×
720
        try {
721
            fis = new FileInputStream(datafile);
×
722
        } catch (FileNotFoundException ex) {
×
723
            throw new RuntimeException(ex);
×
724
        }
×
725

726
        return FileUtil.calculateChecksum(fis, checksumType);
×
727
    }
728

729
    // from MD5Checksum.java
730
    public static String calculateChecksum(InputStream in, ChecksumType checksumType) {
731
        MessageDigest md = null;
×
732
        try {
733
            // Use "SHA-1" (toString) rather than "SHA1", for example.
734
            md = MessageDigest.getInstance(checksumType.toString());
×
735
        } catch (NoSuchAlgorithmException e) {
×
736
            throw new RuntimeException(e);
×
737
        }
×
738

739
        byte[] dataBytes = new byte[1024];
×
740

741
        int nread;
742
        try {
743
            while ((nread = in.read(dataBytes)) != -1) {
×
744
                md.update(dataBytes, 0, nread);
×
745
            }
746
        } catch (IOException ex) {
×
747
            throw new RuntimeException(ex);
×
748
        } finally {
749
            try {
750
                in.close();
×
751
            } catch (Exception e) {
×
752
            }
×
753
        }
754

755
        return checksumDigestToString(md.digest());
×
756
    }
757
    
758
    public static String calculateChecksum(byte[] dataBytes, ChecksumType checksumType) {
759
        MessageDigest md = null;
×
760
        try {
761
            // Use "SHA-1" (toString) rather than "SHA1", for example.
762
            md = MessageDigest.getInstance(checksumType.toString());
×
763
        } catch (NoSuchAlgorithmException e) {
×
764
            throw new RuntimeException(e);
×
765
        }
×
766

767
        md.update(dataBytes);
×
768

769
        return checksumDigestToString(md.digest());
×
770
        
771
    }
772
    
773
    public static String checksumDigestToString(byte[] digestBytes) {
774
        StringBuilder sb = new StringBuilder("");
×
775
        for (int i = 0; i < digestBytes.length; i++) {
×
776
            sb.append(Integer.toString((digestBytes[i] & 0xff) + 0x100, 16).substring(1));
×
777
        }
778
        return sb.toString();
×
779
    }
780

781
    public static String generateOriginalExtension(String fileType) {
782
        if (fileType.equalsIgnoreCase("application/x-spss-sav")) {
1✔
783
            return ".sav";
×
784
        } else if (fileType.equalsIgnoreCase("application/x-spss-por")) {
1✔
785
            return ".por";    
×
786
        // in addition to "application/x-stata" we want to support 
787
        // "application/x-stata-13" ... etc.:
788
        } else if (fileType.toLowerCase().startsWith("application/x-stata")) {
1✔
789
            return ".dta";
1✔
790
        } else if (fileType.equalsIgnoreCase("application/x-dvn-csvspss-zip")) {
×
791
            return ".zip";
×
792
        } else if (fileType.equalsIgnoreCase("application/x-dvn-tabddi-zip")) {
×
793
            return ".zip";
×
794
        } else if (fileType.equalsIgnoreCase("application/x-rlang-transport")) {
×
795
            return ".RData";
×
796
        } else if (fileType.equalsIgnoreCase("text/csv") || fileType.equalsIgnoreCase("text/comma-separated-values")) {
×
797
            return ".csv";
×
798
        } else if (fileType.equalsIgnoreCase("text/tsv") || fileType.equalsIgnoreCase("text/tab-separated-values")) {
×
799
            return ".tsv";
×
800
        } else if (fileType.equalsIgnoreCase("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) {
×
801
            return ".xlsx";
×
802
        }
803
        return "";
×
804
    }
805

806
        public static boolean useRecognizedType(String suppliedContentType, String recognizedType) {
807
                // is it any better than the type that was supplied to us,
808
                // if any?
809
                // This is not as trivial a task as one might expect...
810
                // We may need a list of "good" mime types, that should always
811
                // be chosen over other choices available. Maybe it should
812
                // even be a weighed list... as in, "application/foo" should
813
                // be chosen over "application/foo-with-bells-and-whistles".
814

815
                // For now the logic will be as follows:
816
                //
817
                // 1. If the contentType supplied (by the browser, most likely)
818
                // is some form of "unknown", we always discard it in favor of
819
                // whatever our own utilities have determined;
820
                // 2. We should NEVER trust the browser when it comes to the
821
                // following "ingestable" types: Stata, SPSS, R;
822
                // 2a. We are willing to TRUST the browser when it comes to
823
                // the CSV and XSLX ingestable types.
824
                // 3. We should ALWAYS trust our utilities when it comes to
825
                // ingestable types.
826
                if (suppliedContentType == null || suppliedContentType.equals("")
×
827
                                || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_DEFAULT)
×
828
                                || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_BINARY)
×
829
                                || (canIngestAsTabular(suppliedContentType) 
×
830
                                                && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV)
×
831
                                                && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV_ALT)
×
832
                                                && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_XLSX))
×
833
                                || canIngestAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped")
×
834
                                || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)
×
835
                                || recognizedType.equalsIgnoreCase(BagItFileHandler.FILE_TYPE)
×
NEW
836
                                || recognizedType.equals(MIME_TYPE_ZIP)
×
NEW
837
                || recognizedType.equals(MIME_TYPE_RO_CRATE)) {
×
UNCOV
838
                        return true;
×
839
                }
840
                return false;
×
841
        }
842

843
        public static File saveInputStreamInTempFile(InputStream inputStream, Long fileSizeLimit)
844
            throws IOException, FileExceedsMaxSizeException, FileExceedsStorageQuotaException {
845
            return saveInputStreamInTempFile(inputStream, fileSizeLimit, null);
×
846
        }
847
        
848
        public static File saveInputStreamInTempFile(InputStream inputStream, Long fileSizeLimit, Long storageQuotaLimit)
849
            throws IOException, FileExceedsMaxSizeException, FileExceedsStorageQuotaException {
850
        Path tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload");
×
851
        
852
        if (inputStream != null && tempFile != null) {
×
853
            Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);
×
854
            
855
            // size check: 
856
            // (note that "no size limit set" = "unlimited")
857
            Long fileSize = tempFile.toFile().length();
×
858
            if (fileSizeLimit != null && fileSize > fileSizeLimit) {
×
859
                try {tempFile.toFile().delete();} catch (Exception ex) {}
×
860
                throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit)));  
×
861
            }
862
            
863
            if (storageQuotaLimit != null && fileSize > storageQuotaLimit) {
×
864
                try {tempFile.toFile().delete();} catch (Exception ex) {}
×
865
                throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(fileSize), bytesToHumanReadable(storageQuotaLimit)));  
×
866
            }
867
            
868
            return tempFile.toFile();
×
869
        }
870
        throw new IOException("Failed to save uploaded file.");
×
871
    }
872
    
873
    /* 
874
     * This method creates a DataFile; 
875
     * The bytes from the suppplied InputStream have already been saved in the temporary location. 
876
     * This method should only be called by the upper-level methods that handle 
877
     * file upload and creation for individual use cases - a single file upload, 
878
     * an upload of a zip archive that needs to be unpacked and turned into 
879
     * individual files, etc., and once the file name and mime type have already 
880
     * been figured out. 
881
    */
882

883
    public static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String fileName, String contentType, DataFile.ChecksumType checksumType) {
884
        return createSingleDataFile(version, tempFile, null, fileName, contentType, checksumType, null, false);
×
885
    }
886

887
    public static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String storageIdentifier,  String fileName, String contentType, DataFile.ChecksumType checksumType, String checksum) {
888
        return createSingleDataFile(version, tempFile, storageIdentifier, fileName, contentType, checksumType, checksum, false);
×
889
    }
890
    
891
    public static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String storageIdentifier, String fileName, String contentType, DataFile.ChecksumType checksumType, String checksum, boolean addToDataset) {
892

893
        if ((tempFile == null) && (storageIdentifier == null)) {
×
894
            return null;
×
895
        }
896

897
        DataFile datafile = new DataFile(contentType);
×
898
        datafile.setModificationTime(new Timestamp(new Date().getTime()));
×
899
        /**
900
         * @todo Think more about when permissions on files are modified.
901
         * Obviously, here at create time files have some sort of permissions,
902
         * even if these permissions are *implied*, by ViewUnpublishedDataset at
903
         * the dataset level, for example.
904
         */
905
        datafile.setPermissionModificationTime(new Timestamp(new Date().getTime()));
×
906
        FileMetadata fmd = new FileMetadata();
×
907

908
        fmd.setLabel(fileName);
×
909

910
        if (addToDataset) {
×
911
            datafile.setOwner(version.getDataset());
×
912
        }
913
        fmd.setDataFile(datafile);
×
914
        datafile.getFileMetadatas().add(fmd);
×
915
        if (addToDataset) {
×
916
            if (version.getFileMetadatas() == null) {
×
917
                version.setFileMetadatas(new ArrayList<>());
×
918
            }
919
            version.getFileMetadatas().add(fmd);
×
920
            fmd.setDatasetVersion(version);
×
921
            version.getDataset().getFiles().add(datafile);
×
922
        }
923
        if (storageIdentifier == null) {
×
924
            generateStorageIdentifier(datafile);
×
925
            if (!tempFile.renameTo(new File(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier()))) {
×
926
                return null;
×
927
            }
928
        } else {
929
            datafile.setStorageIdentifier(storageIdentifier);
×
930
        }
931

932
        if ((checksum !=null)&&(!checksum.isEmpty())) {
×
933
                datafile.setChecksumType(checksumType);
×
934
            datafile.setChecksumValue(checksum);
×
935
        } else {
936
                try {
937
                        // We persist "SHA1" rather than "SHA-1".
938
                        datafile.setChecksumType(checksumType);
×
939
                        datafile.setChecksumValue(calculateChecksum(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(), datafile.getChecksumType()));
×
940
                } catch (Exception cksumEx) {
×
941
                        logger.warning("Could not calculate " + checksumType + " signature for the new file " + fileName);
×
942
                }
×
943
        }
944
        return datafile;
×
945
    }
946
    
947
    
948
    /**
949
        For the restructuring of zipped shapefiles, create a timestamped directory.
950
        This directory is deleted after successful restructuring.
951
    
952
        Naming convention: getFilesTempDirectory() + "shp_" + "yyyy-MM-dd-hh-mm-ss-SSS"
953
    */
954
    public static File getShapefileUnzipTempDirectory(){
955
        
956
        String tempDirectory = getFilesTempDirectory();
×
957
        if (tempDirectory == null){
×
958
            logger.severe("Failed to retrieve tempDirectory, null was returned" );
×
959
            return null;
×
960
        }
961
        String datestampedFileName =  "shp_" + new SimpleDateFormat("yyyy-MM-dd-hh-mm-ss-SSS").format(new Date());
×
962
        String datestampedFolderName = tempDirectory + "/" + datestampedFileName;
×
963
        
964
        File datestampedFolder = new File(datestampedFolderName);
×
965
        if (!datestampedFolder.isDirectory()) {
×
966
            /* Note that "createDirectories()" must be used - not 
967
             * "createDirectory()", to make sure all the parent 
968
             * directories that may not yet exist are created as well. 
969
             */
970
            try {
971
                Files.createDirectories(Paths.get(datestampedFolderName));
×
972
            } catch (IOException ex) {
×
973
                logger.severe("Failed to create temp. directory to unzip shapefile: " + datestampedFolderName );
×
974
                return null;
×
975
            }
×
976
        }
977
        return datestampedFolder;        
×
978
    }
979
    
980
    public static boolean canIngestAsTabular(DataFile dataFile) {
981
        String mimeType = dataFile.getContentType();
1✔
982
        
983
        return canIngestAsTabular(mimeType);
1✔
984
    } 
985
    
986
    public static boolean canIngestAsTabular(String mimeType) {
987
        /* 
988
         * In the final 4.0 we'll be doing real-time checks, going through the 
989
         * available plugins and verifying the lists of mime types that they 
990
         * can handle. In 4.0 beta, the ingest plugins are still built into the 
991
         * main code base, so we can just go through a hard-coded list of mime 
992
         * types. -- L.A. 
993
         */
994
        
995
        if (mimeType == null) {
1✔
996
            return false;
1✔
997
        }
998
        
999
        switch (mimeType) {
1✔
1000
            case MIME_TYPE_STATA:
1001
            case MIME_TYPE_STATA13:
1002
            case MIME_TYPE_STATA14:
1003
            case MIME_TYPE_STATA15:
1004
            case MIME_TYPE_RDATA:
1005
            case MIME_TYPE_CSV:
1006
            case MIME_TYPE_CSV_ALT:
1007
            case MIME_TYPE_TSV:
1008
            //case MIME_TYPE_TSV_ALT:
1009
            case MIME_TYPE_XLSX:
1010
            case MIME_TYPE_SPSS_SAV:
1011
            case MIME_TYPE_SPSS_POR:
1012
                return true;
×
1013
            default:
1014
                return false;
1✔
1015
        }
1016
    }
1017
    
1018
    /**
1019
     * Return the location where data should be stored temporarily after uploading (UI or API)
1020
     * for local processing (ingest, unzip, ...) and transfer to final destination (see storage subsystem).
1021
     *
1022
     * This location is checked to be configured, does exist, and is writeable via
1023
     * {@link ConfigCheckService#checkSystemDirectories()}.
1024
     *
1025
     * @return String with a path to the temporary location. Will not be null (former versions did to indicate failure)
1026
     */
1027
    public static String getFilesTempDirectory() {
1028
        return JvmSettings.FILES_DIRECTORY.lookup() + File.separator + "temp";
1✔
1029
    }
1030
    
1031
    public static void generateS3PackageStorageIdentifier(DataFile dataFile) {
1032
            String driverId = dataFile.getOwner().getEffectiveStorageDriverId();
×
1033
                
1034
        String bucketName = System.getProperty("dataverse.files." + driverId + ".bucket-name");
×
1035
        String storageId = driverId + DataAccess.SEPARATOR + bucketName + ":" + dataFile.getFileMetadata().getLabel();
×
1036
        dataFile.setStorageIdentifier(storageId);
×
1037
    }
×
1038
    
1039
    public static void generateStorageIdentifier(DataFile dataFile) {
1040
            //Is it true that this is only used for temp files and we could safely prepend "tmp://" to indicate that?
1041
        dataFile.setStorageIdentifier(generateStorageIdentifier());
×
1042
    }
×
1043
    
1044
    public static String generateStorageIdentifier() {
1045
        
1046
        UUID uid = UUID.randomUUID();
1✔
1047
                
1048
        logger.log(Level.FINE, "UUID value: {0}", uid.toString());
1✔
1049
        
1050
        // last 6 bytes, of the random UUID, in hex: 
1051
        
1052
        String hexRandom = uid.toString().substring(24);
1✔
1053
        
1054
        logger.log(Level.FINE, "UUID (last 6 bytes, 12 hex digits): {0}", hexRandom);
1✔
1055
        
1056
        String hexTimestamp = Long.toHexString(new Date().getTime());
1✔
1057
        
1058
        logger.log(Level.FINE, "(not UUID) timestamp in hex: {0}", hexTimestamp);
1✔
1059
            
1060
        String storageIdentifier = hexTimestamp + "-" + hexRandom;
1✔
1061
        
1062
        logger.log(Level.FINE, "timestamp/UUID hybrid: {0}", storageIdentifier);
1✔
1063
        return storageIdentifier; 
1✔
1064
    }
1065
    
1066
    public static void createIngestFailureReport(DataFile dataFile, String message) {
1067
        createIngestReport(dataFile, IngestReport.INGEST_STATUS_FAILURE, message);
×
1068
    }
×
1069
    
1070
    private static void createIngestReport (DataFile dataFile, int status, String message) {
1071
        IngestReport errorReport = new IngestReport();
×
1072
        if (status == IngestReport.INGEST_STATUS_FAILURE) {
×
1073
                errorReport.setFailure();
×
1074
                errorReport.setReport(message);
×
1075
                errorReport.setDataFile(dataFile);
×
1076
                dataFile.setIngestReport(errorReport);
×
1077
        }
1078
    }
×
1079

1080
    public enum FileCitationExtension {
1✔
1081

1082
        ENDNOTE("-endnote.xml"),
1✔
1083
        RIS(".ris"),
1✔
1084
        BIBTEX(".bib");
1✔
1085

1086
        private final String text;
1087

1088
        private FileCitationExtension(final String text) {
1✔
1089
            this.text = text;
1✔
1090
        }
1✔
1091
    }
1092

1093
    public static String getCiteDataFileFilename(String fileTitle, FileCitationExtension fileCitationExtension) {
1094
            if((fileTitle==null) || (fileCitationExtension == null)) {
1✔
1095
                    return null;
1✔
1096
            }
1097
        if (fileTitle.endsWith("tab")) {
1✔
1098
            return fileTitle.replaceAll("\\.tab$", fileCitationExtension.text);
1✔
1099
        } else {
1100
            return fileTitle + fileCitationExtension.text;
1✔
1101
        }
1102
    }
1103

1104
    /**
1105
     * @todo Consider returning not only the boolean but the human readable
1106
     * reason why the popup is required, which could be used in the GUI to
1107
     * elaborate on the text "This file cannot be downloaded publicly."
1108
     */
1109
    public static boolean isDownloadPopupRequired(DatasetVersion datasetVersion) {
1110
        logger.fine("Checking if download popup is required.");
×
1111
        Boolean answer = popupDueToStateOrTerms(datasetVersion);
×
1112
        if (answer != null) {
×
1113
            return answer;
×
1114
        }
1115
        // 3. Guest Book:
1116
        if (datasetVersion.getDataset() != null && datasetVersion.getDataset().getGuestbook() != null && datasetVersion.getDataset().getGuestbook().isEnabled() && datasetVersion.getDataset().getGuestbook().getDataverse() != null) {
×
1117
            logger.fine("Download popup required because of guestbook.");
×
1118
            return true;
×
1119
        }
1120
        logger.fine("Download popup is not required.");
×
1121
        return false;
×
1122
    }
1123
    
1124
    public static boolean isRequestAccessPopupRequired(DatasetVersion datasetVersion) {
1125
        
1126
        Boolean answer = popupDueToStateOrTerms(datasetVersion);
×
1127
        if (answer != null) {
×
1128
            return answer;
×
1129
        }
1130
     // 3. Guest Book:
1131
        if (datasetVersion.getDataset() != null && datasetVersion.getDataset().getGuestbook() != null && datasetVersion.getDataset().getGuestbook().isEnabled() && datasetVersion.getDataset().getGuestbook().getDataverse() != null) {
×
1132
            logger.fine("Request access popup required because of guestbook.");
×
1133
            return true;
×
1134
        }
1135
        logger.fine("Request access popup is not required.");
×
1136
        return false;
×
1137
    }
1138
    
1139
    /* Code shared by isDownloadPopupRequired and isRequestAccessPopupRequired.
1140
     * 
1141
     * Returns Boolean to allow null = no decision. This allows the isDownloadPopupRequired method to then add another check w.r.t. guestbooks before returning its value.
1142
     * 
1143
     */
1144
    private static Boolean popupDueToStateOrTerms(DatasetVersion datasetVersion) {
1145

1146
        // Each of these conditions is sufficient reason to have to
1147
        // present the user with the popup:
1148
        if (datasetVersion == null) {
×
1149
            logger.fine("Popup not required because datasetVersion is null.");
×
1150
            return false;
×
1151
        }
1152
        // 0. if version is draft then Popup "not required"
1153
        if (!datasetVersion.isReleased()) {
×
1154
            logger.fine("Popup not required because datasetVersion has not been released.");
×
1155
            return false;
×
1156
        }
1157
        // 1. License and Terms of Use:
1158
        if (datasetVersion.getTermsOfUseAndAccess() != null) {
×
1159
            License license = DatasetUtil.getLicense(datasetVersion);
×
1160
            if ((license == null && StringUtils.isNotBlank(datasetVersion.getTermsOfUseAndAccess().getTermsOfUse()))
×
1161
                    || (license != null && !license.isDefault())) {
×
1162
                logger.fine("Popup required because of license or terms of use.");
×
1163
                return true;
×
1164
            }
1165

1166
            // 2. Terms of Access:
1167
            if (!(datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess() == null) && !datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess().equals("")) {
×
1168
                logger.fine("Popup required because of terms of access.");
×
1169
                return true;
×
1170
            }
1171
        }
1172
        //No decision based on the criteria above
1173
        return null;
×
1174
    }
1175

1176
    /**
1177
     * isGuestbookAndTermsPopupRequired
1178
     * meant to replace both isDownloadPopupRequired() and isRequestAccessDownloadPopupRequired() when the guestbook-terms-popup-fragment.xhtml
1179
     * replaced file-download-popup-fragment.xhtml and file-request-access-popup-fragment.xhtml
1180
     * @param datasetVersion
1181
     * @return boolean
1182
     */
1183

1184
    public static boolean isGuestbookAndTermsPopupRequired(DatasetVersion datasetVersion) {
1185
        return isGuestbookPopupRequired(datasetVersion) || isTermsPopupRequired(datasetVersion);
×
1186
    }
1187

1188
    public static boolean isGuestbookPopupRequired(DatasetVersion datasetVersion) {
1189

1190
        if (datasetVersion == null) {
×
1191
            logger.fine("GuestbookPopup not required because datasetVersion is null.");
×
1192
            return false;
×
1193
        }
1194
        //0. if version is draft then Popup "not required"
1195
        if (!datasetVersion.isReleased()) {
×
1196
            logger.fine("GuestbookPopup not required because datasetVersion has not been released.");
×
1197
            return false;
×
1198
        }
1199

1200
        // 3. Guest Book:
1201
        if (datasetVersion.getDataset() != null && datasetVersion.getDataset().getGuestbook() != null && datasetVersion.getDataset().getGuestbook().isEnabled() && datasetVersion.getDataset().getGuestbook().getDataverse() != null) {
×
1202
            logger.fine("GuestbookPopup required because an enabled guestbook exists.");
×
1203
            return true;
×
1204
        }
1205

1206
        logger.fine("GuestbookPopup is not required.");
×
1207
        return false;
×
1208
    }
1209

1210
    public static boolean isTermsPopupRequired(DatasetVersion datasetVersion) {
1211
        Boolean answer = popupDueToStateOrTerms(datasetVersion);
×
1212
        if(answer == null) {
×
1213
            logger.fine("TermsPopup is not required.");
×
1214
            return false;
×
1215
        }
1216
        return answer;
×
1217
    }
1218
    
1219
    /**
1220
     * Provide download URL if no Terms of Use, no guestbook, and not
1221
     * restricted.
1222
     */
1223
    public static boolean isPubliclyDownloadable(FileMetadata fileMetadata) {
1224
        if (fileMetadata == null) {
×
1225
            return false;
×
1226
        }
1227
        if (fileMetadata.isRestricted()) {
×
1228
            String msg = "Not publicly downloadable because the file is restricted.";
×
1229
            logger.fine(msg);
×
1230
            return false;
×
1231
        }
1232
        if (isActivelyEmbargoed(fileMetadata)) {
×
1233
            return false;
×
1234
        }
NEW
1235
        if (isRetentionExpired(fileMetadata)) {
×
NEW
1236
            return false;
×
1237
        }
1238
        boolean popupReasons = isDownloadPopupRequired(fileMetadata.getDatasetVersion());
×
1239
        if (popupReasons == true) {
×
1240
            /**
1241
             * @todo The user clicking publish may have a bad "Dude, where did
1242
             * the file Download URL go" experience in the following scenario:
1243
             *
1244
             * - The user creates a dataset and uploads a file.
1245
             *
1246
             * - The user sets Terms of Use, which means a Download URL should
1247
             * not be displayed.
1248
             *
1249
             * - While the dataset is in draft, the Download URL is displayed
1250
             * due to the rule "Download popup required because datasetVersion
1251
             * has not been released."
1252
             *
1253
             * - Once the dataset is published the Download URL disappears due
1254
             * to the rule "Download popup required because of license or terms
1255
             * of use."
1256
             *
1257
             * In short, the Download URL disappears on publish in the scenario
1258
             * above, which is weird. We should probably attempt to see into the
1259
             * future to when the dataset is published to see if the file will
1260
             * be publicly downloadable or not.
1261
             */
1262
            return false;
×
1263
        }
1264
        return true;
×
1265
    }
1266

1267
    /**
1268
     * This is what the UI displays for "Download URL" on the file landing page
1269
     * (DOIs rather than file IDs.
1270
     */
1271
    public static String getPublicDownloadUrl(String dataverseSiteUrl, String persistentId, Long fileId) {
1272
        String path = null;
×
1273
        if(persistentId != null) {
×
1274
            path = dataverseSiteUrl + "/api/access/datafile/:persistentId?persistentId=" + persistentId;
×
1275
        } else if( fileId != null) {
×
1276
            path = dataverseSiteUrl + "/api/access/datafile/" + fileId;
×
1277
        } else {
1278
            logger.info("In getPublicDownloadUrl but persistentId & fileId are both null!");
×
1279
        }
1280
        return path;
×
1281
    }
1282
    
1283
    /**
1284
     * The FileDownloadServiceBean operates on file IDs, not DOIs.
1285
     */
1286
    public static String getFileDownloadUrlPath(String downloadType, Long fileId, boolean gbRecordsWritten, Long fileMetadataId) {
1287
        String fileDownloadUrl = "/api/access/datafile/" + fileId;
1✔
1288
        if (downloadType != null) {
1✔
1289
            switch(downloadType) {
×
1290
            case "original":
1291
            case"RData":
1292
            case "tab":
1293
            case "GlobusTransfer":
1294
                    fileDownloadUrl = "/api/access/datafile/" + fileId + "?format=" + downloadType;
×
1295
                    break;
×
1296
            case "bundle":
1297
                    if (fileMetadataId == null) {
×
1298
                        fileDownloadUrl = "/api/access/datafile/bundle/" + fileId;
×
1299
                    } else {
1300
                        fileDownloadUrl = "/api/access/datafile/bundle/" + fileId + "?fileMetadataId=" + fileMetadataId;
×
1301
                    }
1302
                    break;
×
1303
            case "var":
1304
                    if (fileMetadataId == null) {
×
1305
                        fileDownloadUrl = "/api/access/datafile/" + fileId + "/metadata";
×
1306
                    } else {
1307
                        fileDownloadUrl = "/api/access/datafile/" + fileId + "/metadata?fileMetadataId=" + fileMetadataId;
×
1308
                    }
1309
                    break;
1310
                }
1311
                
1312
            }
1313
        if (gbRecordsWritten) {
1✔
1314
            if (fileDownloadUrl.contains("?")) {
×
1315
                fileDownloadUrl += "&gbrecs=true";
×
1316
            } else {
1317
                fileDownloadUrl += "?gbrecs=true";
×
1318
            }
1319
        }
1320
        logger.fine("Returning file download url: " + fileDownloadUrl);
1✔
1321
        return fileDownloadUrl;
1✔
1322
    }
1323

1324
    public static File inputStreamToFile(InputStream inputStream) throws IOException {
1325
        if (inputStream == null) {
×
1326
            logger.info("In inputStreamToFile but inputStream was null! Returning null rather than a File.");
×
1327
            return null;
×
1328
        }
1329
        File file = File.createTempFile(UUID.randomUUID().toString(), UUID.randomUUID().toString());
×
1330
        try(OutputStream outputStream = new FileOutputStream(file)){
×
1331
        int read = 0;
×
1332
        byte[] bytes = new byte[1024];
×
1333
        while ((read = inputStream.read(bytes)) != -1) {
×
1334
            outputStream.write(bytes, 0, read);
×
1335
        }
1336
        return file;
×
1337
        }
1338
    }
1339

1340
    /* 
1341
     * This method tells you if thumbnail generation is *supported* 
1342
     * on this type of file. i.e., if true, it does not guarantee that a thumbnail 
1343
     * can/will be generated; but it means that we can try. 
1344
     */
1345
    public static boolean isThumbnailSupported (DataFile file) {
1346
        if (file == null) {
1✔
1347
            return false;
×
1348
        }
1349
        
1350
        if (file.isHarvested() || StringUtil.isEmpty(file.getStorageIdentifier())) {
1✔
1351
            return false;
×
1352
        }
1353
        
1354
        String contentType = file.getContentType();
1✔
1355
        
1356
        // Some browsers (Chrome?) seem to identify FITS files as mime
1357
        // type "image/fits" on upload; this is both incorrect (the official
1358
        // mime type for FITS is "application/fits", and problematic: then
1359
        // the file is identified as an image, and the page will attempt to 
1360
        // generate a preview - which of course is going to fail...
1361
        if (MIME_TYPE_FITSIMAGE.equalsIgnoreCase(contentType)) {
1✔
1362
            return false;
×
1363
        }
1364
        // besides most image/* types, we can generate thumbnails for
1365
        // pdf and "world map" files:
1366
        
1367
        return (contentType != null && 
1✔
1368
                (contentType.startsWith("image/") || 
1✔
1369
                contentType.equalsIgnoreCase("application/pdf") ||
×
1370
                (file.isTabularData() && file.hasGeospatialTag()) ||
×
1371
                contentType.equalsIgnoreCase(MIME_TYPE_GEO_SHAPE)));
1✔
1372
    }
1373
    
1374
    
1375
    /* 
1376
     * The method below appears to be unnecessary; 
1377
     * it duplicates the method generateImageThumbnailFromFileAsBase64() from ImageThumbConverter;
1378
     * plus it creates an unnecessary temp file copy of the source file.    
1379
    public static String rescaleImage(File file) throws IOException {
1380
        if (file == null) {
1381
            logger.info("file was null!!");
1382
            return null;
1383
        }
1384
        File tmpFile = File.createTempFile("tempFileToRescale", ".tmp");
1385
        BufferedImage fullSizeImage = ImageIO.read(file);
1386
        if (fullSizeImage == null) {
1387
            logger.info("fullSizeImage was null!");
1388
            return null;
1389
        }
1390
        int width = fullSizeImage.getWidth();
1391
        int height = fullSizeImage.getHeight();
1392
        FileChannel src = new FileInputStream(file).getChannel();
1393
        FileChannel dest = new FileOutputStream(tmpFile).getChannel();
1394
        dest.transferFrom(src, 0, src.size());
1395
        String pathToResizedFile = ImageThumbConverter.rescaleImage(fullSizeImage, width, height, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE, tmpFile.getAbsolutePath());
1396
        File resizedFile = new File(pathToResizedFile);
1397
        return ImageThumbConverter.getImageAsBase64FromFile(resizedFile);
1398
    }
1399
    */
1400
    
1401
    public static DatasetThumbnail getThumbnail(DataFile file) {
1402

1403
        String imageSourceBase64 = ImageThumbConverter.getImageThumbnailAsBase64(file, ImageThumbConverter.DEFAULT_THUMBNAIL_SIZE);
×
1404
        DatasetThumbnail defaultDatasetThumbnail = new DatasetThumbnail(imageSourceBase64, file);
×
1405
        return defaultDatasetThumbnail;
×
1406

1407
    }
1408
    
1409
    public static boolean isPackageFile(DataFile dataFile) {
1410
        return DataFileServiceBean.MIME_TYPE_PACKAGE_FILE.equalsIgnoreCase(dataFile.getContentType());
×
1411
    }
1412
    
1413
    public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) {
1414
            String driverId = dataset.getEffectiveStorageDriverId();
×
1415
            boolean directEnabled = Boolean.getBoolean("dataverse.files." + driverId + ".upload-redirect");
×
1416
            //Should only be requested when it is allowed, but we'll log a warning otherwise
1417
            if(!directEnabled) {
×
1418
                    logger.warning("Direct upload not supported for files in this dataset: " + dataset.getId());
×
1419
                    return null;
×
1420
            }
1421
            S3AccessIO<DataFile> s3io = null;
×
1422
            String bucket = System.getProperty("dataverse.files." + driverId + ".bucket-name") + "/";
×
1423
            String sid = null;
×
1424
            int i=0;
×
1425
            while (s3io==null && i<5) {
×
1426
                    sid = bucket+ dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + FileUtil.generateStorageIdentifier();
×
1427
                    try {
1428
                            s3io = new S3AccessIO<DataFile>(sid, driverId);
×
1429
                            if(s3io.exists()) {
×
1430
                                    s3io=null;
×
1431
                                    i=i+1;
×
1432
                            } 
1433

1434
                    } catch (Exception e) {
×
1435
                            i=i+1;
×
1436
                    }
×
1437

1438
            }
1439
            return s3io;
×
1440
    }
1441
    
1442
    private static InputStream getOriginalFileInputStream(StorageIO<DataFile> storage, boolean isTabularData) throws IOException {
1443
        storage.open(DataAccessOption.READ_ACCESS);
×
1444
        if (!isTabularData) {
×
1445
            return storage.getInputStream();
×
1446
        } else {
1447
            // if this is a tabular file, read the preserved original "auxiliary file"
1448
            // instead:
1449
            return storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
×
1450
        }
1451
    }
1452

1453
    public static void validateDataFileChecksum(DataFile dataFile) throws IOException {
1454
        DataFile.ChecksumType checksumType = dataFile.getChecksumType();
×
1455
        if (checksumType == null) {
×
1456
            String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.noChecksumType", Arrays.asList(dataFile.getId().toString()));
×
1457
            logger.log(Level.INFO, info);
×
1458
            throw new IOException(info);
×
1459
        }
1460

1461
        StorageIO<DataFile> storage = dataFile.getStorageIO();
×
1462
        String recalculatedChecksum = null;
×
1463

1464
        try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) {
×
1465
            recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType);
×
1466
        } catch (IOException ioex) {
×
1467
            String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString()));
×
1468
            logger.log(Level.INFO, info);
×
1469
            throw new IOException(info);
×
1470
        } catch (RuntimeException rte) {
×
1471
            logger.log(Level.SEVERE, "failed to calculated checksum, one retry", rte);
×
1472
            recalculatedChecksum = null;
×
1473
        }
×
1474

1475
        if (recalculatedChecksum == null) { //retry once
×
1476
            storage = dataFile.getStorageIO();
×
1477
            try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) {
×
1478
                recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType);
×
1479
            }
1480
        }
1481

1482
        if (recalculatedChecksum == null) {
×
1483
            String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failCalculateChecksum", Arrays.asList(dataFile.getId().toString()));
×
1484
            logger.log(Level.INFO, info);
×
1485
            throw new IOException(info);
×
1486
        }
1487

1488
        // TODO? What should we do if the datafile does not have a non-null checksum?
1489
        // Should we fail, or should we assume that the recalculated checksum
1490
        // is correct, and populate the checksumValue field with it?
1491
        if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) {
×
1492
            // There's one possible condition that is 100% recoverable and can
1493
            // be automatically fixed (issue #6660):
1494
            boolean fixed = false;
×
1495
            if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) {
×
1496
                // try again, see if the .orig file happens to be there:
1497
                try (InputStream in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION)) {
×
1498
                    recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
×
1499
                } catch (RuntimeException rte) {
×
1500
                    recalculatedChecksum = null;
×
1501
                }
×
1502
                if (recalculatedChecksum != null) {
×
1503
                    // try again:
1504
                    if (recalculatedChecksum.equals(dataFile.getChecksumValue())) {
×
1505
                        fixed = true;
×
1506
                        try {
1507
                            storage.revertBackupAsAux(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
×
1508
                        } catch (IOException ioex) {
×
1509
                            fixed = false;
×
1510
                        }
×
1511
                    }
1512
                }
1513
            }
1514

1515
            if (!fixed) {
×
1516
                String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString()));
×
1517
                logger.log(Level.INFO, info);
×
1518
                logger.fine("Expected: " + dataFile.getChecksumValue() +", calculated: " + recalculatedChecksum);
×
1519
                throw new IOException(info);
×
1520
            }
1521
        }
1522

1523
        logger.log(Level.INFO, "successfully validated DataFile {0}; checksum {1}", new Object[]{dataFile.getId(), recalculatedChecksum});
×
1524
    }
×
1525
    
1526
    public static String getStorageIdentifierFromLocation(String location) {
1527
            int driverEnd = location.indexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length();
×
1528
            int bucketEnd = driverEnd + location.substring(driverEnd).indexOf("/");
×
1529
            return location.substring(0,bucketEnd) + ":" + location.substring(location.lastIndexOf("/") + 1);
×
1530
    }
1531
    
1532
    public static void deleteTempFile(DataFile dataFile, Dataset dataset, IngestServiceBean ingestService) {
1533
            logger.info("Deleting " + dataFile.getStorageIdentifier());
×
1534
            // Before we remove the file from the list and forget about 
1535
            // it:
1536
            // The physical uploaded file is still sitting in the temporary
1537
            // directory. If it were saved, it would be moved into its 
1538
            // permanent location. But since the user chose not to save it,
1539
            // we have to delete the temp file too. 
1540
            // 
1541
            // Eventually, we will likely add a dedicated mechanism
1542
            // for managing temp files, similar to (or part of) the storage 
1543
            // access framework, that would allow us to handle specialized
1544
            // configurations - highly sensitive/private data, that 
1545
            // has to be kept encrypted even in temp files, and such. 
1546
            // But for now, we just delete the file directly on the 
1547
            // local filesystem: 
1548

1549
            try {
1550
                    List<Path> generatedTempFiles = ingestService.listGeneratedTempFiles(
×
1551
                                    Paths.get(getFilesTempDirectory()), dataFile.getStorageIdentifier());
×
1552
                    if (generatedTempFiles != null) {
×
1553
                            for (Path generated : generatedTempFiles) {
×
1554
                                    logger.fine("(Deleting generated thumbnail file " + generated.toString() + ")");
×
1555
                                    try {
1556
                                            Files.delete(generated);
×
1557
                                    } catch (IOException ioex) {
×
1558
                                            logger.warning("Failed to delete generated file " + generated.toString());
×
1559
                                    }
×
1560
                            }
×
1561
                    }
1562
                    String si = dataFile.getStorageIdentifier();
×
1563
                    if (si.contains(DataAccess.SEPARATOR)) {
×
1564
                            //Direct upload files will already have a store id in their storageidentifier
1565
                            //but they need to be associated with a dataset for the overall storagelocation to be calculated
1566
                            //so we temporarily set the owner
1567
                            if(dataFile.getOwner()!=null) {
×
1568
                                    logger.warning("Datafile owner was not null as expected");
×
1569
                            }
1570
                            dataFile.setOwner(dataset);
×
1571
                            //Use one StorageIO to get the storageLocation and then create a direct storage storageIO class to perform the delete 
1572
                            // (since delete is forbidden except for direct storage)
1573
                            String sl = DataAccess.getStorageIO(dataFile).getStorageLocation();
×
1574
                            DataAccess.getDirectStorageIO(sl).delete();
×
1575
                    } else {
×
1576
                            //Temp files sent to this method have no prefix, not even "tmp://"
1577
                            Files.delete(Paths.get(FileUtil.getFilesTempDirectory() + "/" + dataFile.getStorageIdentifier()));
×
1578
                    }
1579
            } catch (IOException ioEx) {
×
1580
                    // safe to ignore - it's just a temp file. 
1581
                    logger.warning(ioEx.getMessage());
×
1582
                    if(dataFile.getStorageIdentifier().contains(DataAccess.SEPARATOR)) {
×
1583
                            logger.warning("Failed to delete temporary file " + dataFile.getStorageIdentifier());
×
1584
                    } else {
1585
                            logger.warning("Failed to delete temporary file " + FileUtil.getFilesTempDirectory() + "/"
×
1586
                                            + dataFile.getStorageIdentifier());
×
1587
                    }
1588
            } finally {
1589
                    dataFile.setOwner(null);
×
1590
            }
1591
    }
×
1592
    
1593
    public static boolean isFileAlreadyUploaded(DataFile dataFile, Map checksumMapNew, Map fileAlreadyExists) {
1594
        if (checksumMapNew == null) {
×
1595
            checksumMapNew = new HashMap<>();
×
1596
        }
1597
        
1598
        if (fileAlreadyExists == null) {
×
1599
            fileAlreadyExists = new HashMap<>();
×
1600
        }
1601
        
1602
        String chksum = dataFile.getChecksumValue();
×
1603
        
1604
        if (chksum == null) {
×
1605
            return false;
×
1606
        }
1607
        
1608
        if (checksumMapNew.get(chksum) != null) {
×
1609
            fileAlreadyExists.put(dataFile, checksumMapNew.get(chksum));
×
1610
            return true;
×
1611
        }
1612
        
1613
        checksumMapNew.put(chksum, dataFile);
×
1614
        return false;
×
1615
    }
1616
    
1617
    public static String formatFolderListingHtml(String folderName, DatasetVersion version, String apiLocation, boolean originals) {
1618
        String title = formatTitle("Index of folder /" + folderName);
×
1619
        List<FileMetadata> fileMetadatas = version.getFileMetadatasFolderListing(folderName);
×
1620
        
1621
        if (fileMetadatas == null || fileMetadatas.isEmpty()) {
×
1622
            return "";
×
1623
        }
1624
        
1625
        String persistentId = version.getDataset().getGlobalId().asString();
×
1626
        
1627
        StringBuilder sb = new StringBuilder();
×
1628
        
1629
        String versionTag = version.getFriendlyVersionNumber();
×
1630
        versionTag = "DRAFT".equals(versionTag) ? "Draft Version" : "v. " + versionTag;
×
1631
        sb.append(HtmlFormatUtil.formatTag("Index of folder /" + folderName + 
×
1632
                " in dataset " + persistentId + 
1633
                " (" + versionTag + ")", HTML_H1));
1634
        sb.append("\n");
×
1635
        sb.append(formatFolderListingTableHtml(folderName, fileMetadatas, apiLocation, originals));
×
1636
        
1637
        String body = sb.toString();
×
1638
                 
1639
        return formatDoc(title, body);
×
1640
    }
1641
    
1642
    private static String formatFolderListingTableHtml(String folderName, List<FileMetadata> fileMetadatas, String apiLocation, boolean originals) {
1643
        StringBuilder sb = new StringBuilder(); 
×
1644
        
1645
        sb.append(formatFolderListingTableHeaderHtml());
×
1646
        
1647
        for (FileMetadata fileMetadata : fileMetadatas) {
×
1648
            String localFolder = fileMetadata.getDirectoryLabel() == null ? "" : fileMetadata.getDirectoryLabel(); 
×
1649
            
1650
            if (folderName.equals(localFolder)) {
×
1651
                String accessUrl = getFileAccessUrl(fileMetadata, apiLocation, originals);
×
1652
                sb.append(formatFileListEntryHtml(fileMetadata, accessUrl));
×
1653
                sb.append("\n");
×
1654

1655
            } else if (localFolder.startsWith(folderName)){
×
1656
                String subFolder = "".equals(folderName) ? localFolder : localFolder.substring(folderName.length() + 1);
×
1657
                if (subFolder.indexOf('/') > 0) {
×
1658
                    subFolder = subFolder.substring(0, subFolder.indexOf('/'));
×
1659
                }
1660
                String folderAccessUrl = getFolderAccessUrl(fileMetadata.getDatasetVersion(), folderName, subFolder, apiLocation, originals);
×
1661
                sb.append(formatFileListFolderHtml(subFolder, folderAccessUrl));
×
1662
                sb.append("\n");
×
1663
            }
1664
        }
×
1665
        
1666
        return formatTable(sb.toString());
×
1667
    }
1668
        
1669
    private static String formatFolderListingTableHeaderHtml() {
1670
        
1671
        StringBuilder sb = new StringBuilder();
×
1672
        sb.append(HtmlFormatUtil.formatTag("Name", HTML_TABLE_HDR));
×
1673
        sb.append(HtmlFormatUtil.formatTag("Last Modified", HTML_TABLE_HDR));
×
1674
        sb.append(HtmlFormatUtil.formatTag("Size", HTML_TABLE_HDR));
×
1675
        sb.append(HtmlFormatUtil.formatTag("Description", HTML_TABLE_HDR));
×
1676
        
1677
        String hdr = formatTableRow(sb.toString());
×
1678
        
1679
        // add a separator row (again, we want it to look just like Apache index)
1680
        return hdr.concat(formatTableRow(HtmlFormatUtil.formatTag("<hr>", HTML_TABLE_HDR,"colspan=\"4\""))); 
×
1681
        
1682
    }
1683
    
1684
    private static String formatFileListEntryHtml(FileMetadata fileMetadata, String accessUrl) {
1685
        StringBuilder sb = new StringBuilder(); 
×
1686
        
1687
        String fileName = fileMetadata.getLabel();
×
1688
        String dateString =  new SimpleDateFormat(FILE_LIST_DATE_FORMAT).format(fileMetadata.getDataFile().getCreateDate()); 
×
1689
        String sizeString = fileMetadata.getDataFile().getFriendlySize();
×
1690
        
1691
        sb.append(formatTableCell(formatLink(fileName, accessUrl))); 
×
1692
        sb.append(formatTableCellAlignRight(dateString));
×
1693
        sb.append(formatTableCellAlignRight(sizeString));
×
1694
        sb.append(formatTableCellAlignRight("&nbsp;"));
×
1695
        
1696
        return formatTableRow(sb.toString());
×
1697
    }
1698
    
1699
    private static String formatFileListFolderHtml(String folderName, String listApiUrl) {
1700
        
1701
        StringBuilder sb = new StringBuilder();
×
1702
        
1703
        sb.append(formatTableCell(formatLink(folderName+"/", listApiUrl)));
×
1704
        sb.append(formatTableCellAlignRight(" - "));
×
1705
        sb.append(formatTableCellAlignRight(" - "));
×
1706
        sb.append(formatTableCellAlignRight("&nbsp;"));
×
1707
        
1708
        return formatTableRow(sb.toString());
×
1709
    }
1710
    
1711
    private static String getFileAccessUrl(FileMetadata fileMetadata, String apiLocation, boolean original) {
1712
        String fileId = fileMetadata.getDataFile().getId().toString();
×
1713
        
1714
        if (StringUtil.nonEmpty(fileMetadata.getDirectoryLabel())) {
×
1715
            fileId = fileMetadata.getDirectoryLabel().concat("/").concat(fileId);
×
1716
        }
1717
        
1718
        String formatArg = fileMetadata.getDataFile().isTabularData() && original ? "?format=original" : ""; 
×
1719
        
1720
        return apiLocation + "/api/access/datafile/" + fileId + formatArg; 
×
1721
    }
1722
    
1723
    private static String getFolderAccessUrl(DatasetVersion version, String currentFolder, String subFolder, String apiLocation, boolean originals) {
1724
        String datasetId = version.getDataset().getId().toString();
×
1725
        String versionTag = version.getFriendlyVersionNumber();
×
1726
        versionTag = versionTag.replace("DRAFT", DS_VERSION_DRAFT);
×
1727
        if (!"".equals(currentFolder)) {
×
1728
            subFolder = currentFolder + "/" + subFolder;
×
1729
        }
1730
        
1731
        return apiLocation + "/api/datasets/" + datasetId + 
×
1732
                "/dirindex/?version=" + versionTag + "&" +
1733
                "folder=" + subFolder + 
1734
                (originals ? "&original=true" : "");
×
1735
    }
1736

1737
    /**
1738
     * This method takes a JsonArray of JsonObjects and extracts the fields of those
1739
     * objects corresponding to the supplied list of headers to create a rectangular
1740
     * CSV file (with headers) that lists the values from each object in rows. It
1741
     * was developed for use with the metrics API but could be useful in other calls
1742
     * that require the same transformation.
1743
     * 
1744
     * @param jsonArray
1745
     *            - the array of JsonObjects containing key/value pairs with keys
1746
     *            matching the headers. Keys that don't match a header are
1747
     *            ignored/not included.
1748
     * @param headers
1749
     *            - strings to use as CSV headers
1750
     * @return - the CSV file as a string, rows separated by '\n'
1751
     */
1752
    public static String jsonArrayOfObjectsToCSV(JsonArray jsonArray, String... headers) {
1753
        StringBuilder csvSB = new StringBuilder(String.join(",", headers));
×
1754
        jsonArray.forEach((jv) -> {
×
1755
            JsonObject jo = (JsonObject) jv;
×
1756
            String[] values = new String[headers.length];
×
1757
            for (int i = 0; i < headers.length; i++) {
×
1758
                if(jo.containsKey(headers[i])) {
×
1759
                    values[i] = jo.get(headers[i]).toString();
×
1760
                }
1761
            }
1762
            csvSB.append("\n").append(String.join(",", values));
×
1763
        });
×
1764
        return csvSB.toString();
×
1765
    }
1766

1767
    public static boolean isActivelyEmbargoed(DataFile df) {
1768
        Embargo e = df.getEmbargo();
1✔
1769
        if (e != null) {
1✔
1770
            LocalDate endDate = e.getDateAvailable();
1✔
1771
            if (endDate != null && endDate.isAfter(LocalDate.now())) {
1✔
1772
                return true;
1✔
1773
            }
1774
        }
1775
        return false;
1✔
1776
    }
1777

1778
    public static boolean isActivelyEmbargoed(FileMetadata fileMetadata) {
1779
        return isActivelyEmbargoed(fileMetadata.getDataFile());
1✔
1780
    }
1781
    
1782
    public static boolean isActivelyEmbargoed(List<FileMetadata> fmdList) {
1783
        for (FileMetadata fmd : fmdList) {
×
1784
            if (isActivelyEmbargoed(fmd)) {
×
1785
                return true;
×
1786
            }
1787
        }
×
1788
        return false;
×
1789
    }
1790

1791
    public static boolean isRetentionExpired(DataFile df) {
1792
        Retention e = df.getRetention();
1✔
1793
        if (e != null) {
1✔
1794
            LocalDate endDate = e.getDateUnavailable();
1✔
1795
            if (endDate != null && endDate.isBefore(LocalDate.now())) {
1✔
1796
                return true;
1✔
1797
            }
1798
        }
1799
        return false;
1✔
1800
    }
1801

1802
    public static boolean isRetentionExpired(FileMetadata fileMetadata) {
1803
        return isRetentionExpired(fileMetadata.getDataFile());
1✔
1804
    }
1805

1806
    public static boolean isRetentionExpired(List<FileMetadata> fmdList) {
NEW
1807
        for (FileMetadata fmd : fmdList) {
×
NEW
1808
            if (isRetentionExpired(fmd)) {
×
NEW
1809
                return true;
×
1810
            }
NEW
1811
        }
×
NEW
1812
        return false;
×
1813
    }
1814

1815
    public static String getStorageDriver(DataFile dataFile) {
1816
        String storageIdentifier = dataFile.getStorageIdentifier();
×
1817
        return storageIdentifier.substring(0, storageIdentifier.indexOf(DataAccess.SEPARATOR));
×
1818
    }
1819
    
1820
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc