• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #24060

16 Dec 2024 01:30PM CUT coverage: 22.573% (+0.004%) from 22.569%
#24060

push

github

stevenwinship
Merge branch 'develop' into 10714-access-requests-missing-since-upgrade-v6-0

2 of 18 new or added lines in 5 files covered. (11.11%)

3 existing lines in 1 file now uncovered.

19544 of 86582 relevant lines covered (22.57%)

0.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

12.48
/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
1
package edu.harvard.iq.dataverse;
2

3
import edu.harvard.iq.dataverse.DatasetVersion.VersionState;
4
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
5
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
6
import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
7
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
8
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
9
import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
10
import edu.harvard.iq.dataverse.search.SolrSearchResult;
11
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
12
import edu.harvard.iq.dataverse.storageuse.StorageQuota;
13
import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean;
14
import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
15
import edu.harvard.iq.dataverse.util.FileSortFieldAndOrder;
16
import edu.harvard.iq.dataverse.util.FileUtil;
17
import edu.harvard.iq.dataverse.util.SystemConfig;
18
import java.io.IOException;
19
import java.sql.Timestamp;
20
import java.util.ArrayList;
21
import java.util.Collections;
22
import java.util.Date;
23
import java.util.HashMap;
24
import java.util.HashSet;
25
import java.util.Iterator;
26
import java.util.LinkedList;
27
import java.util.List;
28
import java.util.Map;
29
import java.util.Set;
30
import java.util.UUID;
31
import java.util.logging.Level;
32
import java.util.logging.Logger;
33
import jakarta.ejb.EJB;
34
import jakarta.ejb.Stateless;
35
import jakarta.ejb.TransactionAttribute;
36
import jakarta.ejb.TransactionAttributeType;
37
import jakarta.inject.Named;
38
import jakarta.persistence.EntityManager;
39
import jakarta.persistence.NoResultException;
40
import jakarta.persistence.PersistenceContext;
41
import jakarta.persistence.Query;
42
import jakarta.persistence.TypedQuery;
43

44
/**
45
 *
46
 * @author Leonid Andreev
47
 * 
48
 */
49

50
@Stateless
51
@Named
52
public class DataFileServiceBean implements java.io.Serializable {
1✔
53
    
54
    private static final Logger logger = Logger.getLogger(DataFileServiceBean.class.getCanonicalName());
1✔
55
    @EJB
56
    DvObjectServiceBean dvObjectService;
57
    @EJB
58
    PermissionServiceBean permissionService;
59
    @EJB
60
    UserServiceBean userService; 
61
    @EJB
62
    SettingsServiceBean settingsService;
63
    
64
    @EJB 
65
    IngestServiceBean ingestService;
66

67
    @EJB EmbargoServiceBean embargoService;
68
    
69
    @EJB SystemConfig systemConfig;
70
    
71
    @EJB
72
    StorageUseServiceBean storageUseService; 
73
    
74
    @PersistenceContext(unitName = "VDCNet-ejbPU")
75
    private EntityManager em;
76
    
77
    // Assorted useful mime types:
78
    
79
    // 3rd-party and/or proprietary tabular data formasts that we know
80
    // how to ingest:
81
    
82
    private static final String MIME_TYPE_STATA = "application/x-stata";
83
    private static final String MIME_TYPE_STATA13 = "application/x-stata-13";
84
    private static final String MIME_TYPE_RDATA = "application/x-rlang-transport";
85
    private static final String MIME_TYPE_CSV   = "text/csv";
86
    private static final String MIME_TYPE_CSV_ALT = "text/comma-separated-values";
87
    private static final String MIME_TYPE_TSV   = "text/tsv";
88
    public static final String MIME_TYPE_TSV_ALT   = "text/tab-separated-values";
89
    private static final String MIME_TYPE_XLSX  = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
90
    private static final String MIME_TYPE_SPSS_SAV = "application/x-spss-sav";
91
    private static final String MIME_TYPE_SPSS_POR = "application/x-spss-por";
92
    
93
    // Tabular data formats we don't know how to ingets, but still recognize
94
    // as "tabular data":
95
    // TODO: - add more to this list? -- L.A. 4.0 beta13
96
    
97
    private static final String MIME_TYPE_FIXED_FIELD = "text/x-fixed-field";
98
    private static final String MIME_TYPE_SAS_TRANSPORT = "application/x-sas-transport";
99
    private static final String MIME_TYPE_SAS_SYSTEM = "application/x-sas-system";
100
    
101
    // The following are the "control card/syntax" formats that we recognize 
102
    // as "code":
103
    
104
    private static final String MIME_TYPE_R_SYNTAX = "application/x-r-syntax";
105
    private static final String MIME_TYPE_STATA_SYNTAX = "text/x-stata-syntax";
106
    private static final String MIME_TYPE_SPSS_CCARD = "text/x-spss-syntax";
107
    private static final String MIME_TYPE_SAS_SYNTAX = "text/x-sas-syntax";
108

109
    // The types recognized as "documents":
110
    // TODO: there has to be more! -- L.A. 4.0 beta13
111
    
112
    private static final String MIME_TYPE_PLAIN_TEXT = "text/plain";
113
    private static final String MIME_TYPE_DOCUMENT_PDF = "application/pdf";
114
    private static final String MIME_TYPE_DOCUMENT_MSWORD = "application/msword";
115
    private static final String MIME_TYPE_DOCUMENT_MSEXCEL = "application/vnd.ms-excel";
116
    private static final String MIME_TYPE_DOCUMENT_MSWORD_OPENXML = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
117
    
118
    // Supported Astrophysics formats: 
119
    // (only FITS at this point)
120
    
121
    private static final String MIME_TYPE_FITS  = "application/fits";
122

123
    // Network Data files: 
124
    // (only GRAPHML at this point): 
125
    
126
    private static final String MIME_TYPE_NETWORK_GRAPHML = "text/xml-graphml";
127
   
128
    // SHAPE file type: 
129
    // this is the only supported file type in the GEO DATA class:
130
        
131
    private static final String MIME_TYPE_ZIP   = "application/zip";
132
    
133
    private static final String MIME_TYPE_UNDETERMINED_DEFAULT = "application/octet-stream";
134
    private static final String MIME_TYPE_UNDETERMINED_BINARY = "application/binary";
135

136
    /**
137
     * Per https://en.wikipedia.org/wiki/Media_type#Vendor_tree just "dataverse"
138
     * should be fine.
139
     *
140
     * @todo Consider registering this at http://www.iana.org/form/media-types
141
     * or switch to "prs" which "includes media types created experimentally or
142
     * as part of products that are not distributed commercially" according to
143
     * the page URL above.
144
     */
145
    public static final String MIME_TYPE_PACKAGE_FILE = "application/vnd.dataverse.file-package";
146
    
147
    public DataFile find(Object pk) {
148
        return em.find(DataFile.class, pk);
×
149
    }   
150
    
151
    /*public DataFile findByMD5(String md5Value){
152
        if (md5Value == null){
153
            return null;
154
        }
155
        Query query = em.createQuery("select object(o) from DataFile as o where o.md5 =:md5Value order by o.id");
156
        query.setParameter("md5Value", md5Value);
157
        return (DataFile)query.getSingleResult();
158
        
159
    }*/
160
    
161
    public List<DataFile> findAll(List<Long> fileIds){
162
        List<DataFile> dataFiles = new ArrayList<>();
×
163

164
         for (Long fileId : fileIds){
×
165
             dataFiles.add(find(fileId));
×
166
         }
×
167

168
        return dataFiles;
×
169
    }
170

171
    public List<DataFile> findAll(String fileIdsAsString){
172
        ArrayList<Long> dataFileIds = new ArrayList<>();
×
173

174
        String[] fileIds = fileIdsAsString.split(",");
×
175
        for (String fId : fileIds){
×
176
            dataFileIds.add(Long.parseLong(fId));
×
177
        }
178

179
        return findAll(dataFileIds);
×
180
    }
181
    
182
    public DataFile findByGlobalId(String globalId) {
183
            return (DataFile) dvObjectService.findByGlobalId(globalId, DvObject.DType.DataFile);
×
184
    }
185

186
    public List<DataFile> findByCreatorId(Long creatorId) {
187
        return em.createNamedQuery("DataFile.findByCreatorId").setParameter("creatorId", creatorId).getResultList();
×
188
    }
189

190
    public List<DataFile> findByReleaseUserId(Long releaseUserId) {
191
        return em.createNamedQuery("DataFile.findByReleaseUserId").setParameter("releaseUserId", releaseUserId).getResultList();
×
192
    }
193

194
    public DataFile findReplacementFile(Long previousFileId){
195
        Query query = em.createQuery("select object(o) from DataFile as o where o.previousDataFileId = :previousFileId");
×
196
        query.setParameter("previousFileId", previousFileId);
×
197
        try {
198
            DataFile retVal = (DataFile)query.getSingleResult();
×
199
            return retVal;
×
200
        } catch(Exception ex) {
×
201
            return null;
×
202
        }
203
    }
204

205
    
206
    public DataFile findPreviousFile(DataFile df){
207
        TypedQuery<DataFile> query = em.createQuery("select o from DataFile o" + " WHERE o.id = :dataFileId", DataFile.class);
×
208
        query.setParameter("dataFileId", df.getPreviousDataFileId());
×
209
        try {
210
            DataFile retVal = query.getSingleResult();
×
211
            return retVal;
×
212
        } catch(Exception ex) {
×
213
            return null;
×
214
        }
215
    }
216
    
217
    public List<DataFile> findByDatasetId(Long studyId) {
218
        /* 
219
           Sure, we don't have *studies* any more, in 4.0; it's a tribute 
220
           to the past. -- L.A.
221
        */
222
        String qr = "select o from DataFile o where o.owner.id = :studyId order by o.id";
×
223
        return em.createQuery(qr, DataFile.class)
×
224
                .setParameter("studyId", studyId).getResultList();
×
225
    }
226
    
227
    /**
228
     * 
229
     * @param collectionId numeric id of the parent collection ("dataverse")
230
     * @return list of files in the datasets that are *direct* children of the collection specified
231
     * (i.e., no datafiles in sub-collections of this collection will be included)
232
     */
233
    public List<DataFile> findByDirectCollectionOwner(Long collectionId) {
234
        String queryString = "select f from DataFile f, Dataset d where f.owner.id = d.id and d.owner.id = :collectionId order by f.id";
×
235
        return em.createQuery(queryString, DataFile.class)
×
236
                .setParameter("collectionId", collectionId).getResultList();
×
237
    }
238
    
239
    public List<DataFile> findAllRelatedByRootDatafileId(Long datafileId) {
240
        /* 
241
         Get all files with the same root datafile id
242
         the first file has its own id as root so only one query needed.
243
        */
244
        String qr = "select o from DataFile o where o.rootDataFileId = :datafileId order by o.id";
×
245
        return em.createQuery(qr, DataFile.class)
×
246
                .setParameter("datafileId", datafileId).getResultList();
×
247
    }
248

249
    public DataFile findByStorageIdandDatasetVersion(String storageId, DatasetVersion dv) {
250
        try {
251
            Query query = em.createNativeQuery("select o.id from dvobject o, filemetadata m " +
×
252
                    "where o.storageidentifier = '" + storageId + "' and o.id = m.datafile_id and m.datasetversion_id = " +
253
                    dv.getId() + "");
×
254
            query.setMaxResults(1);
×
255
            if (query.getResultList().size() < 1) {
×
256
                return null;
×
257
            } else {
258
                return findCheapAndEasy((Long) query.getSingleResult());
×
259
                //Pretty sure the above return will always error due to a conversion error
260
                //I "reverted" my change because I ended up not using this, but here is the fix below --MAD
261
//                Integer qr = (Integer) query.getSingleResult();
262
//                return findCheapAndEasy(qr.longValue());
263
            }
264
        } catch (Exception e) {
×
265
            logger.log(Level.SEVERE, "Error finding datafile by storageID and DataSetVersion: " + e.getMessage());
×
266
            return null;
×
267
        }
268
    }
269
    
270
    public List<FileMetadata> findFileMetadataByDatasetVersionId(Long datasetVersionId, int maxResults, String userSuppliedSortField, String userSuppliedSortOrder) {
271
        FileSortFieldAndOrder sortFieldAndOrder = new FileSortFieldAndOrder(userSuppliedSortField, userSuppliedSortOrder);
×
272
        String sortField = sortFieldAndOrder.getSortField();
×
273
        String sortOrder = sortFieldAndOrder.getSortOrder();
×
274
        if (maxResults < 0) {
×
275
            // return all results if user asks for negative number of results
276
            maxResults = 0;
×
277
        }
278
        String qr = "select o from FileMetadata o where o.datasetVersion.id = :datasetVersionId order by o." + sortField + " " + sortOrder;
×
279
        return em.createQuery(qr, FileMetadata.class)
×
280
                    .setParameter("datasetVersionId", datasetVersionId)
×
281
                    .setMaxResults(maxResults)
×
282
                    .getResultList();
×
283
    }
284
    
285
    public List<FileMetadata> findFileMetadataByDatasetVersionIdLabelSearchTerm(Long datasetVersionId, String searchTerm, String userSuppliedSortField, String userSuppliedSortOrder){
286
        FileSortFieldAndOrder sortFieldAndOrder = new FileSortFieldAndOrder(userSuppliedSortField, userSuppliedSortOrder);
×
287

288
        String sortField = sortFieldAndOrder.getSortField();
×
289
        String sortOrder = sortFieldAndOrder.getSortOrder();
×
290
        String searchClause = "";
×
291
        if(searchTerm != null && !searchTerm.isEmpty()){
×
292
            searchClause = " and  (lower(o.label) like '%" + searchTerm.toLowerCase() + "%' or lower(o.description) like '%" + searchTerm.toLowerCase() + "%')";
×
293
        }
294
        
295
        String queryString = "select o from FileMetadata o where o.datasetVersion.id = :datasetVersionId"
×
296
                + searchClause
297
                + " order by o." + sortField + " " + sortOrder;
298
        return em.createQuery(queryString, FileMetadata.class) 
×
299
            .setParameter("datasetVersionId", datasetVersionId)
×
300
            .getResultList();
×
301
    }
302
    
303
    public List<Integer> findFileMetadataIdsByDatasetVersionIdLabelSearchTerm(Long datasetVersionId, String searchTerm, String userSuppliedSortField, String userSuppliedSortOrder){
304
        FileSortFieldAndOrder sortFieldAndOrder = new FileSortFieldAndOrder(userSuppliedSortField, userSuppliedSortOrder);
×
305
        
306
        searchTerm=searchTerm.trim();
×
307
        String sortField = sortFieldAndOrder.getSortField();
×
308
        String sortOrder = sortFieldAndOrder.getSortOrder();
×
309
        String searchClause = "";
×
310
        if(searchTerm != null && !searchTerm.isEmpty()){
×
311
            searchClause = " and  (lower(o.label) like '%" + searchTerm.toLowerCase() + "%' or lower(o.description) like '%" + searchTerm.toLowerCase() + "%')";
×
312
        }
313
        
314
        //the createNativeQuary takes persistant entities, which Integer.class is not,
315
        //which is causing the exception. Hence, this query does not need an Integer.class
316
        //as the second parameter. 
317
        return em.createNativeQuery("select o.id from FileMetadata o where o.datasetVersion_id = "  + datasetVersionId
×
318
                + searchClause
319
                + " order by o." + sortField + " " + sortOrder)
320
                .getResultList();
×
321
    }
322
    
323
    public List<Long> findDataFileIdsByDatasetVersionIdLabelSearchTerm(Long datasetVersionId, String searchTerm, String userSuppliedSortField, String userSuppliedSortOrder){
324
        FileSortFieldAndOrder sortFieldAndOrder = new FileSortFieldAndOrder(userSuppliedSortField, userSuppliedSortOrder);
×
325
        
326
        searchTerm=searchTerm.trim();
×
327
        String sortField = sortFieldAndOrder.getSortField();
×
328
        String sortOrder = sortFieldAndOrder.getSortOrder();
×
329
        String searchClause = "";
×
330
        if(searchTerm != null && !searchTerm.isEmpty()){
×
331
            searchClause = " and  (lower(o.label) like '%" + searchTerm.toLowerCase() + "%' or lower(o.description) like '%" + searchTerm.toLowerCase() + "%')";
×
332
        }
333
        
334
        return em.createNativeQuery("select o.datafile_id from FileMetadata o where o.datasetVersion_id = "  + datasetVersionId
×
335
                + searchClause
336
                + " order by o." + sortField + " " + sortOrder)
337
                .getResultList();
×
338
    }
339
    
340
    public List<FileMetadata> findFileMetadataByDatasetVersionIdLazy(Long datasetVersionId, int maxResults, String userSuppliedSortField, String userSuppliedSortOrder, int firstResult) {
341
        FileSortFieldAndOrder sortFieldAndOrder = new FileSortFieldAndOrder(userSuppliedSortField, userSuppliedSortOrder);
×
342
        String sortField = sortFieldAndOrder.getSortField();
×
343
        String sortOrder = sortFieldAndOrder.getSortOrder();
×
344

345
        if (maxResults < 0) {
×
346
            // return all results if user asks for negative number of results
347
            maxResults = 0;
×
348
        }
349
        return em.createQuery("select o from FileMetadata o where o.datasetVersion.id = :datasetVersionId order by o." + sortField + " " + sortOrder, FileMetadata.class)
×
350
                .setParameter("datasetVersionId", datasetVersionId)
×
351
                .setMaxResults(maxResults)
×
352
                .setFirstResult(firstResult)
×
353
                .getResultList();
×
354
    }
355
    
356
    public Long findCountByDatasetVersionId(Long datasetVersionId){
357
        return (Long) em.createNativeQuery("select count(*)  from FileMetadata fmd "
×
358
                + " where fmd.datasetVersion_id = " + datasetVersionId
359
                + ";").getSingleResult();
×
360
    }
361

362
    public FileMetadata findFileMetadata(Long fileMetadataId) {
363
        return em.find(FileMetadata.class, fileMetadataId);
×
364
    }
365
    
366
    public FileMetadata findFileMetadataByDatasetVersionIdAndDataFileId(Long datasetVersionId, Long dataFileId) {
367

368
        Query query = em.createQuery("select o from FileMetadata o where o.datasetVersion.id = :datasetVersionId  and o.dataFile.id = :dataFileId");
×
369
        query.setParameter("datasetVersionId", datasetVersionId);
×
370
        query.setParameter("dataFileId", dataFileId);
×
371
        try {
372
            FileMetadata retVal = (FileMetadata) query.getSingleResult();
×
373
            return retVal;
×
374
        } catch(Exception ex) {
×
375
            return null;
×
376
        }
377
    }
378

379
    public FileMetadata findMostRecentVersionFileIsIn(DataFile file) {
380
        if (file == null) {
1✔
381
            return null;
1✔
382
        }
383
        List<FileMetadata> fileMetadatas = file.getFileMetadatas();
×
384
        if (fileMetadatas == null || fileMetadatas.isEmpty()) {
×
385
            return null;
×
386
        } else {
387
            // This assumes the order of filemetadatas is from first to most recent, which is true as of v6.3 
388
            return fileMetadatas.get(fileMetadatas.size() - 1);
×
389
        }
390
    }
391
    
392
    public List<DataFile> findAllCheapAndEasy(String fileIdsAsString){ 
393
        //assumption is that the fileIds are separated by ','
394
        ArrayList <DataFile> dataFilesFound = new ArrayList<>();
×
395
        String[] fileIds = fileIdsAsString.split(",");
×
396
        DataFile df = this.findCheapAndEasy(Long.parseLong(fileIds[0]));
×
397
        if(df != null){
×
398
            dataFilesFound.add(df);
×
399
        }
400

401
        return dataFilesFound;
×
402
    }
403

404
    public DataFile findCheapAndEasy(Long id) {
405
        DataFile dataFile;
406

407
        Object[] result;
408

409
        try {
410
            result = (Object[]) em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t0.PREVIEWIMAGEAVAILABLE, t1.CONTENTTYPE, t0.STORAGEIDENTIFIER, t1.FILESIZE, t1.INGESTSTATUS, t1.CHECKSUMVALUE, t1.RESTRICTED, t3.ID, t2.AUTHORITY, t2.IDENTIFIER, t1.CHECKSUMTYPE, t1.PREVIOUSDATAFILEID, t1.ROOTDATAFILEID, t0.AUTHORITY, T0.PROTOCOL, T0.IDENTIFIER, t2.PROTOCOL FROM DVOBJECT t0, DATAFILE t1, DVOBJECT t2, DATASET t3 WHERE ((t0.ID = " + id + ") AND (t0.OWNER_ID = t2.ID) AND (t2.ID = t3.ID) AND (t1.ID = t0.ID))").getSingleResult();
×
411
        } catch (Exception ex) {
×
412
            return null;
×
413
        }
×
414

415
        if (result == null) {
×
416
            return null;
×
417
        }
418

419
        Integer file_id = (Integer) result[0];
×
420

421
        dataFile = new DataFile();
×
422
        dataFile.setMergeable(false);
×
423

424
        dataFile.setId(file_id.longValue());
×
425

426
        Timestamp createDate = (Timestamp) result[1];
×
427
        Timestamp indexTime = (Timestamp) result[2];
×
428
        Timestamp modificationTime = (Timestamp) result[3];
×
429
        Timestamp permissionIndexTime = (Timestamp) result[4];
×
430
        Timestamp permissionModificationTime = (Timestamp) result[5];
×
431
        Timestamp publicationDate = (Timestamp) result[6];
×
432

433
        dataFile.setCreateDate(createDate);
×
434
        dataFile.setIndexTime(indexTime);
×
435
        dataFile.setModificationTime(modificationTime);
×
436
        dataFile.setPermissionIndexTime(permissionIndexTime);
×
437
        dataFile.setPermissionModificationTime(permissionModificationTime);
×
438
        dataFile.setPublicationDate(publicationDate);
×
439

440
        // no support for users yet!
441
        // (no need to - so far? -- L.A. 4.2.2) 
442
        /*
443
         Long creatorId = (Long) result[7];
444
         if (creatorId != null) {
445
         AuthenticatedUser creator = userMap.get(creatorId);
446
         if (creator == null) {
447
         creator = userService.find(creatorId);
448
         if (creator != null) {
449
         userMap.put(creatorId, creator);
450
         }
451
         }
452
         if (creator != null) {
453
         dataFile.setCreator(creator);
454
         }
455
         }
456

457
         Long releaseUserId = (Long) result[8];
458
         if (releaseUserId != null) {
459
         AuthenticatedUser releaseUser = userMap.get(releaseUserId);
460
         if (releaseUser == null) {
461
         releaseUser = userService.find(releaseUserId);
462
         if (releaseUser != null) {
463
         userMap.put(releaseUserId, releaseUser);
464
         }
465
         }
466
         if (releaseUser != null) {
467
         dataFile.setReleaseUser(releaseUser);
468
         }
469
         }
470
         */
471
        Boolean previewAvailable = (Boolean) result[9];
×
472
        if (previewAvailable != null) {
×
473
            dataFile.setPreviewImageAvailable(previewAvailable);
×
474
        }
475
        
476
        String contentType = (String) result[10];
×
477
        
478
        if (contentType != null) {
×
479
            dataFile.setContentType(contentType);
×
480
        }
481

482
        String storageIdentifier = (String) result[11];
×
483

484
        if (storageIdentifier != null) {
×
485
            dataFile.setStorageIdentifier(storageIdentifier);
×
486
        }
487

488
        Long fileSize = (Long) result[12];
×
489

490
        if (fileSize != null) {
×
491
            dataFile.setFilesize(fileSize);
×
492
        }
493

494
        if (result[13] != null) {
×
495
            String ingestStatusString = (String) result[13];
×
496
            dataFile.setIngestStatus(ingestStatusString.charAt(0));
×
497
        }
498

499
        String md5 = (String) result[14];
×
500

501
        if (md5 != null) {
×
502
            dataFile.setChecksumValue(md5);
×
503
        }
504

505
        Boolean restricted = (Boolean) result[15];
×
506
        if (restricted != null) {
×
507
            dataFile.setRestricted(restricted);
×
508
        }
509

510

511
        Dataset owner = new Dataset();
×
512

513
        
514
        // TODO: check for nulls
515
        owner.setId((Long)result[16]);
×
516
        owner.setAuthority((String)result[17]);
×
517
        owner.setIdentifier((String)result[18]);
×
518

519
        String checksumType = (String) result[19];
×
520
        if (checksumType != null) {
×
521
            try {
522
                // In the database we store "SHA1" rather than "SHA-1".
523
                DataFile.ChecksumType typeFromStringInDatabase = DataFile.ChecksumType.valueOf(checksumType);
×
524
                dataFile.setChecksumType(typeFromStringInDatabase);
×
525
            } catch (IllegalArgumentException ex) {
×
526
                logger.info("Exception trying to convert " + checksumType + " to enum: " + ex);
×
527
            }
×
528
        }
529
        
530
        Long previousDataFileId = (Long) result[20];
×
531
        if (previousDataFileId != null){
×
532
            dataFile.setPreviousDataFileId(previousDataFileId);
×
533
        }
534
        
535
        Long rootDataFileId = (Long) result[21];
×
536
        if (rootDataFileId != null){
×
537
            dataFile.setRootDataFileId(rootDataFileId);
×
538
        } 
539
        
540
        String authority = (String) result[22];
×
541
        if (authority != null) {
×
542
            dataFile.setAuthority(authority);
×
543
        }
544

545
        String protocol = (String) result[23];
×
546
        if (protocol != null) {
×
547
            dataFile.setProtocol(protocol);
×
548
        }
549

550
        String identifier = (String) result[24];
×
551
        if (identifier != null) {
×
552
            dataFile.setIdentifier(identifier);
×
553
        }
554
        
555
        owner.setProtocol((String) result[25]);
×
556
        
557
        dataFile.setOwner(owner);
×
558

559
        // If content type indicates it's tabular data, spend 2 extra queries 
560
        // looking up the data table and tabular tags objects:
561
        
562
        if (MIME_TYPE_TSV.equalsIgnoreCase(contentType) || MIME_TYPE_TSV_ALT.equalsIgnoreCase(contentType)) {
×
563
            Object[] dtResult;
564
            try {
565
                dtResult = (Object[]) em.createNativeQuery("SELECT ID, UNF, CASEQUANTITY, VARQUANTITY, ORIGINALFILEFORMAT, ORIGINALFILESIZE FROM dataTable WHERE DATAFILE_ID = " + id).getSingleResult();
×
566
            } catch (Exception ex) {
×
567
                dtResult = null;
×
568
            }
×
569
        
570
            if (dtResult != null) {
×
571
                DataTable dataTable = new DataTable(); 
×
572

573
                dataTable.setId(((Integer) dtResult[0]).longValue());
×
574
            
575
                dataTable.setUnf((String)dtResult[1]);
×
576
            
577
                dataTable.setCaseQuantity((Long)dtResult[2]);
×
578
            
579
                dataTable.setVarQuantity((Long)dtResult[3]);
×
580
            
581
                dataTable.setOriginalFileFormat((String)dtResult[4]);
×
582
                
583
                dataTable.setOriginalFileSize((Long)dtResult[5]);
×
584
                
585
                dataTable.setDataFile(dataFile);
×
586
                dataFile.setDataTable(dataTable);
×
587
                
588
                // tabular tags: 
589
                
590
                List<Object[]> tagResults;
591
                try {
592
                    tagResults = em.createNativeQuery("SELECT t.TYPE, t.DATAFILE_ID FROM DATAFILETAG t WHERE t.DATAFILE_ID = " + id).getResultList();
×
593
                } catch (Exception ex) {
×
594
                    logger.info("EXCEPTION looking up tags.");
×
595
                    tagResults = null;
×
596
                }
×
597
                
598
                if (tagResults != null) {
×
599
                    List<String> fileTagLabels = DataFileTag.listTags();
×
600
                    
601
                    for (Object[] tagResult : tagResults) {
×
602
                        Integer tagId = (Integer)tagResult[0];
×
603
                        DataFileTag tag = new DataFileTag();
×
604
                        tag.setTypeByLabel(fileTagLabels.get(tagId));
×
605
                        tag.setDataFile(dataFile);
×
606
                        dataFile.addTag(tag);
×
607
                    }
×
608
                }
609
            }
610
        }
611
        
612
        return dataFile;
×
613
    }
614
    
615
    private List<AuthenticatedUser> retrieveFileAccessRequesters(DataFile fileIn) {
616
        List<AuthenticatedUser> retList = new ArrayList<>();
×
617

618
        // List<Object> requesters = em.createNativeQuery("select authenticated_user_id
619
        // from fileaccessrequests where datafile_id =
620
        // "+fileIn.getId()).getResultList();
621
        TypedQuery<Long> typedQuery = em.createQuery("select f.user.id from FileAccessRequest f where f.dataFile.id = :file_id and f.requestState= :requestState", Long.class);
×
622
        typedQuery.setParameter("file_id", fileIn.getId());
×
623
        typedQuery.setParameter("requestState", FileAccessRequest.RequestState.CREATED);
×
624
        List<Long> requesters = typedQuery.getResultList();
×
625
        for (Long userId : requesters) {
×
626
            AuthenticatedUser user = userService.find(userId);
×
627
            if (user != null) {
×
628
                retList.add(user);
×
629
            }
630
        }
×
631

632
        return retList;
×
633
    }
634
    
635
    private List<FileMetadata> retrieveFileMetadataForVersion(Dataset dataset, DatasetVersion version, List<DataFile> dataFiles, Map<Long, Integer> filesMap, Map<Long, Integer> categoryMap) {
636
        List<FileMetadata> retList = new ArrayList<>();
×
637
        Map<Long, Set<Long>> categoryMetaMap = new HashMap<>();
×
638
        
639
        List<Object[]> categoryResults = em.createNativeQuery("select t0.filecategories_id, t0.filemetadatas_id from filemetadata_datafilecategory t0, filemetadata t1 where (t0.filemetadatas_id = t1.id) AND (t1.datasetversion_id = "+version.getId()+")").getResultList();
×
640
        int i = 0;
×
641
        for (Object[] result : categoryResults) {
×
642
            Long category_id = (Long) result[0];
×
643
            Long filemeta_id = (Long) result[1];
×
644
            if (categoryMetaMap.get(filemeta_id) == null) {
×
645
                categoryMetaMap.put(filemeta_id, new HashSet<>());
×
646
            }
647
            categoryMetaMap.get(filemeta_id).add(category_id);
×
648
            i++;
×
649
        }
×
650
        logger.fine("Retrieved and mapped "+i+" file categories attached to files in the version "+version.getId());
×
651
        
652
        List<Object[]> metadataResults = em.createNativeQuery("select id, datafile_id, DESCRIPTION, LABEL, RESTRICTED, DIRECTORYLABEL, prov_freeform from FileMetadata where datasetversion_id = "+version.getId() + " ORDER BY LABEL").getResultList();
×
653
        
654
        for (Object[] result : metadataResults) {
×
655
            Integer filemeta_id = (Integer) result[0];
×
656
            
657
            if (filemeta_id == null) {
×
658
                continue;
×
659
            }
660
            
661
            Long file_id = (Long) result[1];
×
662
            if (file_id == null) {
×
663
                continue;
×
664
            }
665
            
666
            Integer file_list_id = filesMap.get(file_id);
×
667
            if (file_list_id == null) {
×
668
                continue;
×
669
            }
670
            FileMetadata fileMetadata = new FileMetadata();
×
671
            fileMetadata.setId(filemeta_id.longValue());
×
672
            fileMetadata.setCategories(new LinkedList<>());
×
673

674
            if (categoryMetaMap.get(fileMetadata.getId()) != null) {
×
675
                for (Long cat_id : categoryMetaMap.get(fileMetadata.getId())) {
×
676
                    if (categoryMap.get(cat_id) != null) {
×
677
                        fileMetadata.getCategories().add(dataset.getCategories().get(categoryMap.get(cat_id)));
×
678
                    }
679
                }
×
680
            }
681

682
            fileMetadata.setDatasetVersion(version);
×
683
            
684
            // Link the FileMetadata object to the DataFile:
685
            fileMetadata.setDataFile(dataFiles.get(file_list_id));
×
686
            // ... and the DataFile back to the FileMetadata:
687
            fileMetadata.getDataFile().getFileMetadatas().add(fileMetadata);
×
688
            
689
            String description = (String) result[2]; 
×
690
            
691
            if (description != null) {
×
692
                fileMetadata.setDescription(description);
×
693
            }
694
            
695
            String label = (String) result[3];
×
696
            
697
            if (label != null) {
×
698
                fileMetadata.setLabel(label);
×
699
            }
700
                        
701
            Boolean restricted = (Boolean) result[4];
×
702
            if (restricted != null) {
×
703
                fileMetadata.setRestricted(restricted);
×
704
            }
705
            
706
            String dirLabel = (String) result[5];
×
707
            if (dirLabel != null){
×
708
                fileMetadata.setDirectoryLabel(dirLabel);
×
709
            }
710
            
711
            String provFreeForm = (String) result[6];
×
712
            if (provFreeForm != null){
×
713
                fileMetadata.setProvFreeForm(provFreeForm);
×
714
            }
715
                        
716
            retList.add(fileMetadata);
×
717
        }
×
718
        
719
        logger.fine("Retrieved "+retList.size()+" file metadatas for version "+version.getId()+" (inside the retrieveFileMetadataForVersion method).");
×
720
                
721
        
722
        /* 
723
            We no longer perform this sort here, just to keep this filemetadata
724
            list as identical as possible to when it's produced by the "traditional"
725
            EJB method. When it's necessary to have the filemetadatas sorted by 
726
            FileMetadata.compareByLabel, the DatasetVersion.getFileMetadatasSorted()
727
            method should be called. 
728
        
729
        Collections.sort(retList, FileMetadata.compareByLabel); */
730
        
731
        return retList; 
×
732
    }
733
    
734
    public List<DataFile> findIngestsInProgress() {
735
        if ( em.isOpen() ) {
×
736
            String qr = "select object(o) from DataFile as o where o.ingestStatus =:scheduledStatusCode or o.ingestStatus =:progressStatusCode order by o.id";
×
737
            return em.createQuery(qr, DataFile.class)
×
738
                .setParameter("scheduledStatusCode", DataFile.INGEST_STATUS_SCHEDULED)
×
739
                .setParameter("progressStatusCode", DataFile.INGEST_STATUS_INPROGRESS)
×
740
                .getResultList();
×
741
        } else {
742
            return Collections.emptyList();
×
743
        }
744
    }
745
    
746
    
747
    public DataTable findDataTableByFileId(Long fileId) {
748
        Query query = em.createQuery("select object(o) from DataTable as o where o.dataFile.id =:fileId order by o.id");
×
749
        query.setParameter("fileId", fileId);
×
750
        
751
        Object singleResult;
752
        
753
        try{
754
            return (DataTable)query.getSingleResult();
×
755
        }catch(NoResultException ex){
×
756
            return null;
×
757
        }
758
    }
759
    
760
    public List<DataFile> findAll() {
761
        return em.createQuery("select object(o) from DataFile as o order by o.id", DataFile.class).getResultList();
×
762
    }
763
    
764
    public List<VersionState> findVersionStates(Long fileId) {
765
        Query query = em.createQuery(
×
766
                "select distinct dv.versionState from DatasetVersion dv where dv.id in (select fm.datasetVersion.id from FileMetadata fm where fm.dataFile.id=:fileId)");
767
        query.setParameter("fileId", fileId);
×
768
        return query.getResultList();
×
769
    }
770
    
771
    public DataFile save(DataFile dataFile) {
772

773
        if (dataFile.isMergeable()) {   
×
774
            DataFile savedDataFile = em.merge(dataFile);
×
775
            return savedDataFile;
×
776
        } else {
777
            throw new IllegalArgumentException("This DataFile object has been set to NOT MERGEABLE; please ensure a MERGEABLE object is passed to the save method.");
×
778
        } 
779
    }
780
    
781
    @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
782
    public DataFile saveInTransaction(DataFile dataFile) {
783

784
        if (dataFile.isMergeable()) {   
×
785
            DataFile savedDataFile = em.merge(dataFile);
×
786
            return savedDataFile;
×
787
        } else {
788
            throw new IllegalArgumentException("This DataFile object has been set to NOT MERGEABLE; please ensure a MERGEABLE object is passed to the save method.");
×
789
        } 
790
    }
791
    
792
    private void msg(String m){
793
        System.out.println(m);
×
794
    }
×
795
    private void dashes(){
796
        msg("----------------");
×
797
    }
×
798
    private void msgt(String m){
799
        dashes(); msg(m); dashes();
×
800
    }
×
801
    
802
    /*
803
        Make sure the file replace ids are set for a initial version 
804
        of a file
805
    
806
    */
807
    public DataFile setAndCheckFileReplaceAttributes(DataFile savedDataFile){
808
               
809
        // Is this the initial version of a file?
810
        
811
        if ((savedDataFile.getRootDataFileId() == null)||
×
812
                (savedDataFile.getRootDataFileId().equals(DataFile.ROOT_DATAFILE_ID_DEFAULT))){
×
813
            msg("yes, initial version");
×
814
 
815
           // YES!  Set the RootDataFileId to the Id
816
           savedDataFile.setRootDataFileId(savedDataFile.getId());
×
817
           
818
           // SAVE IT AGAIN!!!
819
           msg("yes, save again");
×
820
        
821
            return em.merge(savedDataFile);   
×
822
           
823
        }else{       
824
            // Looking Good Billy Ray! Feeling Good Louis!    
825
            msg("nope, looks ok");
×
826

827
            return savedDataFile;
×
828
        }
829
    }
830
    
831
    
832
    public Boolean isPreviouslyPublished(Long fileId){
833
        Query query = em.createQuery("select object(o) from FileMetadata as o where o.dataFile.id =:fileId");
×
834
        query.setParameter("fileId", fileId);
×
835
        List<?> retList = query.getResultList();
×
836
        return (retList.size() > 1);
×
837
    }
838
    
839
    public void deleteFromVersion( DatasetVersion d, DataFile f ) {
840
                em.createNamedQuery("DataFile.removeFromDatasetVersion")
×
841
                        .setParameter("versionId", d.getId()).setParameter("fileId", f.getId())
×
842
                                .executeUpdate();
×
843
    }
×
844

845
    /* 
846
     Convenience methods for merging and removingindividual file metadatas, 
847
     without touching the rest of the DataFile object:
848
    */
849
    
850
    public FileMetadata mergeFileMetadata(FileMetadata fileMetadata) {
851
        
852
        FileMetadata newFileMetadata = em.merge(fileMetadata);
×
853
        em.flush();
×
854
        
855
        // Set the initial value of the rootDataFileId
856
        //    (does nothing if it's already set)
857
        //DataFile updatedDataFile = setAndCheckFileReplaceAttributes(newFileMetadata.getDataFile());
858
               
859
        return newFileMetadata;
×
860
    }
861
    
862
    public void removeFileMetadata(FileMetadata fileMetadata) {
863
        msgt("removeFileMetadata: fileMetadata");
×
864
        FileMetadata mergedFM = em.merge(fileMetadata);
×
865
        em.remove(mergedFM);
×
866
    }
×
867
    
868
    /* 
869
     * Same, for DataTables:
870
    */
871
    
872
    public DataTable saveDataTable(DataTable dataTable) {
873
        DataTable merged = em.merge(dataTable);
×
874
        em.flush();
×
875
        return merged;
×
876
    }
877
    
878
    public List<DataFile> findHarvestedFilesByClient(HarvestingClient harvestingClient) {
879
        String qr = "SELECT d FROM DataFile d, DvObject o, Dataset s WHERE o.id = d.id AND o.owner.id = s.id AND s.harvestedFrom.id = :harvestingClientId";
×
880
        return em.createQuery(qr, DataFile.class)
×
881
            .setParameter("harvestingClientId", harvestingClient.getId())
×
882
            .getResultList();
×
883
    }
884
    
885
    /*moving to the fileutil*/
886
    
887
    public void generateStorageIdentifier(DataFile dataFile) {
888
        dataFile.setStorageIdentifier(generateStorageIdentifier());
×
889
    }
×
890
    
891
    public String generateStorageIdentifier() {
892
        
893
        UUID uid = UUID.randomUUID();
×
894
                
895
        logger.log(Level.FINE, "UUID value: {0}", uid.toString());
×
896
        
897
        // last 6 bytes, of the random UUID, in hex: 
898
        
899
        String hexRandom = uid.toString().substring(24);
×
900
        
901
        logger.log(Level.FINE, "UUID (last 6 bytes, 12 hex digits): {0}", hexRandom);
×
902
        
903
        String hexTimestamp = Long.toHexString(new Date().getTime());
×
904
        
905
        logger.log(Level.FINE, "(not UUID) timestamp in hex: {0}", hexTimestamp);
×
906
            
907
        String storageIdentifier = hexTimestamp + "-" + hexRandom;
×
908
        
909
        logger.log(Level.FINE, "timestamp/UUID hybrid: {0}", storageIdentifier);
×
910
        return storageIdentifier; 
×
911
    }
912
    
913
    public boolean isSpssPorFile (DataFile file) {
914
        return (file != null) ? MIME_TYPE_SPSS_POR.equalsIgnoreCase(file.getContentType()) : false;
1✔
915
    }
916
    
917
    public boolean isSpssSavFile (DataFile file) {
918
        return (file != null) ? MIME_TYPE_SPSS_SAV.equalsIgnoreCase(file.getContentType()) : false;
1✔
919
    }
920
    
921
    /*
922
    public boolean isSpssPorFile (FileMetadata fileMetadata) {
923
        if (fileMetadata != null && fileMetadata.getDataFile() != null) {
924
            return isSpssPorFile(fileMetadata.getDataFile());
925
        }
926
        return false; 
927
    }
928
    */
929
    
930
    /*
931
     * This method will return true if the thumbnail is *actually available* and
932
     * ready to be downloaded. (it will try to generate a thumbnail for supported
933
     * file types, if not yet available)
934
     */
935
    public boolean isThumbnailAvailable (DataFile file) {
936
        if (file == null) {
×
937
            return false; 
×
938
        } 
939

940
        // If this file already has the "thumbnail generated" flag set,
941
        // we'll just trust that:
942
        if (file.isPreviewImageAvailable()) {
×
943
            logger.fine("returning true");
×
944
            return true;
×
945
        }
946
        
947
        // If thumbnails are not even supported for this class of files, 
948
        // there's nothing to talk about:      
949
        if (!FileUtil.isThumbnailSupported(file)) {
×
950
            return false;
×
951
        }
952
        
953
        /*
954
         Checking the permission here was resulting in extra queries; 
955
         it is now the responsibility of the client - such as the DatasetPage - 
956
         to make sure the permission check out, before calling this method.
957
         (or *after* calling this method? - checking permissions costs db 
958
         queries; checking if the thumbnail is available may cost cpu time, if 
959
         it has to be generated on the fly - so you have to figure out which 
960
         is more important... 
961
        
962
        */
963
        
964
        file = this.find(file.getId());
×
965
        if (ImageThumbConverter.isThumbnailAvailable(file)) {
×
966
            file.setPreviewImageAvailable(true);
×
967
            this.save(file);
×
968
            return true;
×
969
        }
970
        file.setPreviewImageFail(true);
×
971
        file.setPreviewImageAvailable(false);
×
972
        this.save(file);
×
973
        return false;
×
974
    }
975

976
    
977
    /* 
978
     * Methods for identifying "classes" (groupings) of files by type:
979
    */
980
    
981
    public String getFileClassById (Long fileId) {
982
        DataFile file = find(fileId);
×
983
        
984
        if (file == null) {
×
985
            return null; 
×
986
        }
987
        
988
        return getFileThumbnailClass(file);
×
989
    }
990
    
991
    public String getFileThumbnailClass (DataFile file) {
992
        // there's no solr search facet for "package files", but
993
        // there is a special thumbnail icon:
994
        if (isFileClassPackage(file)) {
1✔
995
            return FileUtil.FILE_THUMBNAIL_CLASS_PACKAGE;
×
996
        }
997
        
998
        if (file != null) {
1✔
999
            String fileTypeFacet = FileUtil.getFacetFileType(file);
1✔
1000
        
1001
            if (fileTypeFacet != null && FileUtil.FILE_THUMBNAIL_CLASSES.containsKey(fileTypeFacet)) {
1✔
1002
                return FileUtil.FILE_THUMBNAIL_CLASSES.get(fileTypeFacet);
1✔
1003
            }
1004
        }
1005
        
1006
        return FileUtil.FILE_THUMBNAIL_CLASS_OTHER;
1✔
1007
    }
1008
    
1009
    
1010
    
1011
    public boolean isFileClassImage (DataFile file) {
1012
        if (file == null) {
1✔
1013
            return false;
1✔
1014
        }
1015
        
1016
        String contentType = file.getContentType();
1✔
1017

1018
        // Some browsers (Chrome?) seem to identify FITS files as mime
1019
        // type "image/fits" on upload; this is both incorrect (the official
1020
        // mime type for FITS is "application/fits", and problematic: then
1021
        // the file is identified as an image, and the page will attempt to 
1022
        // generate a preview - which of course is going to fail...
1023
        
1024
        if (FileUtil.MIME_TYPE_FITSIMAGE.equalsIgnoreCase(contentType)) {
1✔
1025
            return false;
×
1026
        }
1027
        // besides most image/* types, we can generate thumbnails for 
1028
        // pdf and "world map" files:
1029
        
1030
        return (contentType != null && (contentType.toLowerCase().startsWith("image/")));
1✔
1031
    }
1032
    
1033
    public boolean isFileClassAudio (DataFile file) {
1034
        if (file == null) {
1✔
1035
            return false;
1✔
1036
        }
1037
        
1038
        String contentType = file.getContentType();
1✔
1039
        
1040
        // TODO: 
1041
        // verify that there are no audio types that don't start with "audio/" - 
1042
        //  some exotic mp[34]... ?
1043
        
1044
        return (contentType != null && (contentType.toLowerCase().startsWith("audio/")));    
1✔
1045
    }
1046
    
1047
    public boolean isFileClassCode (DataFile file) {
1048
        if (file == null) {
1✔
1049
            return false;
1✔
1050
        }
1051
     
1052
        String contentType = file.getContentType();
1✔
1053
        
1054
        // The following are the "control card/syntax" formats that we recognize 
1055
        // as "code":
1056
    
1057
        return (MIME_TYPE_R_SYNTAX.equalsIgnoreCase(contentType)
1✔
1058
            || MIME_TYPE_STATA_SYNTAX.equalsIgnoreCase(contentType) 
1✔
1059
            || MIME_TYPE_SAS_SYNTAX.equalsIgnoreCase(contentType)
1✔
1060
            || MIME_TYPE_SPSS_CCARD.equalsIgnoreCase(contentType));
1✔
1061
        
1062
    }
1063
    
1064
    public boolean isFileClassDocument (DataFile file) {
1065
        if (file == null) {
1✔
1066
            return false;
1✔
1067
        }
1068
        
1069
        // "Documents": PDF, assorted MS docs, etc. 
1070
        
1071
        String contentType = file.getContentType();
1✔
1072
        int scIndex = 0;
1✔
1073
        if (contentType != null && (scIndex = contentType.indexOf(';')) > 0) {
1✔
1074
            contentType = contentType.substring(0, scIndex);
×
1075
        }
1076
        
1077
        return (MIME_TYPE_PLAIN_TEXT.equalsIgnoreCase(contentType)
1✔
1078
            || MIME_TYPE_DOCUMENT_PDF.equalsIgnoreCase(contentType)
1✔
1079
            || MIME_TYPE_DOCUMENT_MSWORD.equalsIgnoreCase(contentType)
1✔
1080
            || MIME_TYPE_DOCUMENT_MSEXCEL.equalsIgnoreCase(contentType)
1✔
1081
            || MIME_TYPE_DOCUMENT_MSWORD_OPENXML.equalsIgnoreCase(contentType));
1✔
1082
        
1083
    }
1084
    
1085
    public boolean isFileClassAstro (DataFile file) {
1086
        if (file == null) {
1✔
1087
            return false;
1✔
1088
        }
1089
        
1090
        String contentType = file.getContentType();
1✔
1091
       
1092
        // The only known/supported "Astro" file type is FITS,
1093
        // so far:
1094
        
1095
        return (MIME_TYPE_FITS.equalsIgnoreCase(contentType) || FileUtil.MIME_TYPE_FITSIMAGE.equalsIgnoreCase(contentType));
1✔
1096
        
1097
    }
1098
    
1099
    public boolean isFileClassNetwork (DataFile file) {
1100
        if (file == null) {
1✔
1101
            return false;
1✔
1102
        }
1103
        
1104
        String contentType = file.getContentType();
1✔
1105
       
1106
        // The only known/supported Network Data type is GRAPHML,
1107
        // so far:
1108
        
1109
        return MIME_TYPE_NETWORK_GRAPHML.equalsIgnoreCase(contentType);
1✔
1110
        
1111
    }
1112
    
1113
    /* 
1114
     * we don't really need a method for "other" - 
1115
     * it's "other" if it fails to identify as any specific class... 
1116
     * (or do we?)
1117
    public boolean isFileClassOther (DataFile file) {
1118
        if (file == null) {
1119
            return false;
1120
        }
1121
        
1122
    }
1123
    */
1124
    
1125
    public boolean isFileClassGeo (DataFile file) {
1126
        if (file == null) {
1✔
1127
            return false;
1✔
1128
        }
1129
        
1130
        String contentType = file.getContentType();
1✔
1131
       
1132
        // The only known/supported Geo Data type is SHAPE,
1133
        // so far:
1134
        
1135
        return FileUtil.MIME_TYPE_GEO_SHAPE.equalsIgnoreCase(contentType);
1✔
1136
    }
1137
    
1138
    public boolean isFileClassTabularData (DataFile file) {
1139
        if (file == null) {
1✔
1140
            return false;
1✔
1141
        }
1142
        
1143
        // "Tabular data" is EITHER an INGESTED tabular data file, i.e.
1144
        // a file with a DataTable and DataVariables; or a DataFile 
1145
        // of one of the many known tabular data formats - SPSS, Stata, etc.
1146
        // that for one reason or another didn't get ingested: 
1147
        
1148
        if (file.isTabularData()) {
1✔
1149
            return true; 
×
1150
        }
1151
        
1152
        // The formats we know how to ingest: 
1153
        if (FileUtil.canIngestAsTabular(file)) {
1✔
1154
            return true;
×
1155
        }
1156
        
1157
        String contentType = file.getContentType();
1✔
1158
        
1159
        // And these are the formats we DON'T know how to ingest, 
1160
        // but nevertheless recognize as "tabular data":
1161
        
1162
        return (MIME_TYPE_TSV.equalsIgnoreCase(contentType)
1✔
1163
            || MIME_TYPE_FIXED_FIELD.equalsIgnoreCase(contentType) 
1✔
1164
            || MIME_TYPE_SAS_TRANSPORT.equalsIgnoreCase(contentType)
1✔
1165
            || MIME_TYPE_SAS_SYSTEM.equalsIgnoreCase(contentType));
1✔
1166
        
1167
    }
1168
    
1169
    public boolean isFileClassVideo (DataFile file) {
1170
        if (file == null) {
1✔
1171
            return false;
1✔
1172
        }
1173
        
1174
        String contentType = file.getContentType();
1✔
1175
        
1176
        // TODO: 
1177
        // check if there are video types that don't start with "audio/" - 
1178
        // some exotic application/... formats ?
1179
        
1180
        return (contentType != null && (contentType.toLowerCase().startsWith("video/")));    
1✔
1181
        
1182
    }
1183
    
1184
    public boolean isFileClassPackage (DataFile file) {
1185
        if (file == null) {
1✔
1186
            return false;
×
1187
        }
1188
        
1189
        String contentType = file.getContentType();
1✔
1190
       
1191
        return MIME_TYPE_PACKAGE_FILE.equalsIgnoreCase(contentType);
1✔
1192
    }
1193
    
1194
    public void populateFileSearchCard(SolrSearchResult solrSearchResult) {
1195
        solrSearchResult.setEntity(this.findCheapAndEasy(solrSearchResult.getEntityId()));
×
1196
    }
×
1197
    
1198
    public boolean hasBeenDeleted(DataFile df){
1199
        Dataset dataset = df.getOwner();
×
1200
        DatasetVersion dsv = dataset.getLatestVersion();
×
1201
        
1202
        return findFileMetadataByDatasetVersionIdAndDataFileId(dsv.getId(), df.getId()) == null;
×
1203
        
1204
    }
1205
    
1206
    /**
1207
     * Is this a replacement file??
1208
     * 
1209
     * The indication of a previousDataFileId says that it is
1210
     * 
1211
     * @param df
1212
     * @return
1213
     */
1214
    public boolean isReplacementFile(DataFile df) {
1215

1216
        if (df.getPreviousDataFileId() == null){
×
1217
            return false;
×
1218
        }else if (df.getPreviousDataFileId() < 1){
×
1219
            String errMSg = "Stop! previousDataFileId should either be null or a number greater than 0";
×
1220
            logger.severe(errMSg);
×
1221
            return false;
×
1222
            // blow up -- this shouldn't happen!
1223
            //throw new FileReplaceException(errMSg);
1224
        }else if (df.getPreviousDataFileId() > 0){
×
1225
            return true;
×
1226
        }
1227
        return false;
×
1228
    }  // end: isReplacementFile
1229
    
1230
    public List<Long> selectFilesWithMissingOriginalTypes() {
1231
        Query query = em.createNativeQuery("SELECT f.id FROM datafile f, datatable t where t.datafile_id = f.id AND (t.originalfileformat='" + MIME_TYPE_TSV + "' OR t.originalfileformat IS NULL) ORDER BY f.id");
×
1232
        
1233
        try {
1234
            return query.getResultList();
×
1235
        } catch (Exception ex) {
×
1236
            return new ArrayList<>();
×
1237
        }
1238
    }
1239
    
1240
    public List<Long> selectFilesWithMissingOriginalSizes() {
1241
        Query query = em.createNativeQuery("SELECT f.id FROM datafile f, datatable t where t.datafile_id = f.id AND (t.originalfilesize IS NULL ) AND (t.originalfileformat IS NOT NULL) ORDER BY f.id");
×
1242
        
1243
        try {
1244
            return query.getResultList();
×
1245
        } catch (Exception ex) {
×
1246
            return new ArrayList<>();
×
1247
        }
1248
    }
1249
    
1250

1251
    public void finalizeFileDelete(Long dataFileId, String storageLocation) throws IOException {
1252
        // Verify that the DataFile no longer exists: 
1253
        if (find(dataFileId) != null) {
×
1254
            throw new IOException("Attempted to permanently delete a physical file still associated with an existing DvObject "
×
1255
                    + "(id: " + dataFileId + ", location: " + storageLocation);
1256
        }
1257
        if(storageLocation == null || storageLocation.isBlank()) {
×
1258
            throw new IOException("Attempted to delete a physical file with no location "
×
1259
                    + "(id: " + dataFileId + ", location: " + storageLocation);
1260
        }
1261
        StorageIO<DvObject> directStorageAccess = DataAccess.getDirectStorageIO(storageLocation);
×
1262
        directStorageAccess.delete();
×
1263
    }
×
1264
    
1265
    public void finalizeFileDeletes(Map<Long, String> storageLocations) {
1266
        storageLocations.keySet().stream().forEach((dataFileId) -> {
×
1267
            String storageLocation = storageLocations.get(dataFileId);
×
1268

1269
            try {
1270
                finalizeFileDelete(dataFileId, storageLocation);
×
1271
            } catch (IOException ioex) {
×
1272
                logger.warning("Failed to delete the physical file associated with the deleted datafile id="
×
1273
                        + dataFileId + ", storage location: " + storageLocation);
1274
            }
×
1275
        });
×
1276
    }
×
1277
    
1278
    public Map<Long, String> getPhysicalFilesToDelete(DatasetVersion datasetVersion) {
1279
        return getPhysicalFilesToDelete(datasetVersion, false);
×
1280
    }
1281
    
1282
    public Map<Long, String> getPhysicalFilesToDelete(DatasetVersion datasetVersion, boolean destroy) {
1283
        // Gather the locations of the physical files associated with DRAFT
1284
        // (unpublished) DataFiles (or ALL the DataFiles, if "destroy") in the 
1285
        // DatasetVersion, that will need to be deleted once the 
1286
        // DeleteDatasetVersionCommand execution has been finalized:
1287

1288
        return getPhysicalFilesToDelete(datasetVersion.getFileMetadatas(), destroy);
×
1289
    }
1290
    
1291
    public Map<Long, String> getPhysicalFilesToDelete(List<FileMetadata> fileMetadatasToDelete) {
1292
        return getPhysicalFilesToDelete(fileMetadatasToDelete, false);
×
1293
    }
1294
    
1295
    public Map<Long, String> getPhysicalFilesToDelete(List<FileMetadata> fileMetadatasToDelete, boolean destroy) {
1296
        Map<Long, String> deleteStorageLocations = new HashMap<>();
×
1297

1298
        Iterator<FileMetadata> dfIt = fileMetadatasToDelete.iterator();
×
1299
        while (dfIt.hasNext()) {
×
1300
            DataFile df = dfIt.next().getDataFile();
×
1301

1302
            if (destroy || !df.isReleased()) {
×
1303

1304
                String storageLocation = getPhysicalFileToDelete(df);
×
1305
                if (storageLocation != null) {
×
1306
                    deleteStorageLocations.put(df.getId(), storageLocation);
×
1307
                }
1308

1309
            }
1310
        }
×
1311

1312
        return deleteStorageLocations;
×
1313
    }
1314
  
1315
    public Map<Long, String> getPhysicalFilesToDelete(Dataset dataset) {
1316
        // Gather the locations of ALL the physical files associated with 
1317
        // a DATASET that is being DESTROYED, that will need to be deleted
1318
        // once the DestroyDataset command execution has been finalized. 
1319
        // Once again, note that we are selecting all the files from the dataset
1320
        // - not just drafts. 
1321

1322
        Map<Long, String> deleteStorageLocations = new HashMap<>();
×
1323

1324
        Iterator<DataFile> dfIt = dataset.getFiles().iterator();
×
1325
        while (dfIt.hasNext()) {
×
1326
            DataFile df = dfIt.next();
×
1327

1328
            String storageLocation = getPhysicalFileToDelete(df);
×
1329
            if (storageLocation != null) {
×
1330
                deleteStorageLocations.put(df.getId(), storageLocation);
×
1331
            }
1332

1333
        }
×
1334

1335
        return deleteStorageLocations;
×
1336
    }
1337
    
1338
    public String getPhysicalFileToDelete(DataFile dataFile) {
1339
        try {
1340
            StorageIO<DataFile> storageIO = dataFile.getStorageIO();
×
1341
            return storageIO.getStorageLocation();
×
1342

1343
        } catch (IOException ioex) {
×
1344
            // something potentially wrong with the physical file,
1345
            // or connection to the physical storage? 
1346
            // we don't care (?) - we'll still try to delete the datafile from the database.
1347
        }
1348
        return null;
×
1349
    }
1350
    
1351
    public boolean isFoldersMetadataPresentInVersion(DatasetVersion datasetVersion) {
1352
        Query query = em.createNativeQuery("SELECT id FROM fileMetadata WHERE datasetversion_id="+datasetVersion.getId()+" AND directoryLabel IS NOT null LIMIT 1");
×
1353
        
1354
        try {
1355
            int count = query.getResultList().size();
×
1356
            return count > 0;
×
1357
        } catch (Exception ex) {
×
1358
            return false;
×
1359
        }
1360
    }
1361
    
1362
    public boolean isActivelyEmbargoed(FileMetadata fm) {
1363
        return FileUtil.isActivelyEmbargoed(fm);
×
1364
    }
1365

1366
    public Embargo findEmbargo(Long id) {
1367
        DataFile d = find(id);
×
1368
        return d.getEmbargo();
×
1369
    }
1370

1371
    public boolean isRetentionExpired(FileMetadata fm) {
1372
        return FileUtil.isRetentionExpired(fm);
×
1373
    }
1374
    /**
1375
     * Checks if the supplied DvObjectContainer (Dataset or Collection; although
1376
     * only collection-level storage quotas are officially supported as of now)
1377
     * has a quota configured, and if not, keeps checking if any of the direct
1378
     * ancestor Collections further up have a configured quota. If it finds one, 
1379
     * it will retrieve the current total content size for that specific ancestor 
1380
     * dvObjectContainer and use it to define the quota limit for the upload
1381
     * session in progress. 
1382
     * 
1383
     * @param parent - DvObjectContainer, Dataset or Collection
1384
     * @return upload session size limit spec, or null if quota not defined on 
1385
     * any of the ancestor DvObjectContainers
1386
     */
1387
    public UploadSessionQuotaLimit getUploadSessionQuotaLimit(DvObjectContainer parent) {
1388
        DvObjectContainer testDvContainer = parent; 
×
1389
        StorageQuota quota = testDvContainer.getStorageQuota();
×
1390
        while (quota == null && testDvContainer.getOwner() != null) {
×
1391
            testDvContainer = testDvContainer.getOwner();
×
1392
            quota = testDvContainer.getStorageQuota();
×
1393
            if (quota != null) {
×
1394
                break;
×
1395
            }
1396
        }    
1397
        if (quota == null || quota.getAllocation() == null) {
×
1398
            return null; 
×
1399
        }
1400
        
1401
        // Note that we are checking the recorded storage use not on the 
1402
        // immediate parent necessarily, but on the specific ancestor 
1403
        // DvObjectContainer on which the storage quota is defined:
1404
        Long currentSize = storageUseService.findStorageSizeByDvContainerId(testDvContainer.getId()); 
×
1405
        
1406
        return new UploadSessionQuotaLimit(quota.getAllocation(), currentSize);
×
1407
    }
1408

1409
    public boolean isInReleasedVersion(Long id) {
NEW
1410
        Query query = em.createNativeQuery("SELECT fm.id FROM filemetadata fm WHERE fm.datasetversion_id=(SELECT dv.id FROM datasetversion dv, dvobject dvo WHERE dv.dataset_id=dvo.owner_id AND dv.versionState='RELEASED' and dvo.id=" + id + " ORDER BY dv.versionNumber DESC, dv.minorVersionNumber DESC LIMIT 1) AND fm.datafile_id=" + id);
×
1411
        
1412
        try {
1413
            query.getSingleResult();
×
1414
            return true;
×
1415
        } catch (Exception ex) {
×
1416
            return false;
×
1417
        }
1418
    }
1419
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc