• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #22985

23 Aug 2024 06:32PM UTC coverage: 20.61% (-0.2%) from 20.791%
#22985

Pull #10781

github

landreev
added an upfront locks check to the /addGlobusFiles api #10623
Pull Request #10781: Improved handling of Globus uploads

4 of 417 new or added lines in 15 files covered. (0.96%)

4194 existing lines in 35 files now uncovered.

17388 of 84365 relevant lines covered (20.61%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
1
package edu.harvard.iq.dataverse.util.bagit;
2

3
import java.io.BufferedReader;
4
import java.io.ByteArrayInputStream;
5
import java.io.File;
6
import java.io.FileOutputStream;
7
import java.io.IOException;
8
import java.io.InputStream;
9
import java.io.InputStreamReader;
10
import java.io.OutputStream;
11
import java.io.PrintWriter;
12
import java.io.UnsupportedEncodingException;
13
import java.net.MalformedURLException;
14
import java.net.URI;
15
import java.net.URISyntaxException;
16
import java.nio.file.Paths;
17
import java.security.KeyManagementException;
18
import java.security.KeyStoreException;
19
import java.security.NoSuchAlgorithmException;
20
import java.text.SimpleDateFormat;
21
import java.util.ArrayList;
22
import java.util.Calendar;
23
import java.util.HashMap;
24
import java.util.HashSet;
25
import java.util.Iterator;
26
import java.util.LinkedHashMap;
27
import java.util.Set;
28
import java.util.TreeSet;
29
import java.util.Map.Entry;
30
import java.util.concurrent.ExecutionException;
31
import java.util.concurrent.ExecutorService;
32
import java.util.concurrent.Executors;
33
import java.util.concurrent.TimeUnit;
34
import java.util.logging.Level;
35
import java.util.logging.Logger;
36
import java.util.zip.ZipEntry;
37

38
import edu.harvard.iq.dataverse.util.BundleUtil;
39
import org.apache.commons.codec.digest.DigestUtils;
40
import org.apache.commons.compress.archivers.zip.ParallelScatterZipCreator;
41
import org.apache.commons.compress.archivers.zip.ScatterZipOutputStream;
42
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
43
import org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest;
44
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
45
import org.apache.commons.compress.archivers.zip.ZipFile;
46
import org.apache.commons.compress.parallel.InputStreamSupplier;
47
import org.apache.commons.compress.utils.IOUtils;
48
import org.apache.commons.text.WordUtils;
49
import org.apache.http.client.ClientProtocolException;
50
import org.apache.http.client.config.CookieSpecs;
51
import org.apache.http.client.config.RequestConfig;
52
import org.apache.http.client.methods.CloseableHttpResponse;
53
import org.apache.http.client.methods.HttpGet;
54
import org.apache.http.config.Registry;
55
import org.apache.http.config.RegistryBuilder;
56
import org.apache.http.conn.socket.ConnectionSocketFactory;
57
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
58
import org.apache.http.conn.ssl.NoopHostnameVerifier;
59
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
60
import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
61
import org.apache.http.ssl.SSLContextBuilder;
62
import org.apache.http.util.EntityUtils;
63
import org.apache.http.impl.client.CloseableHttpClient;
64
import org.apache.http.impl.client.HttpClients;
65
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
66
import org.json.JSONArray;
67
import com.google.gson.JsonArray;
68
import com.google.gson.JsonElement;
69
import com.google.gson.JsonObject;
70
import com.google.gson.JsonParser;
71
import com.google.gson.JsonPrimitive;
72
import com.google.gson.JsonSyntaxException;
73

74
import edu.harvard.iq.dataverse.DataFile;
75
import edu.harvard.iq.dataverse.DataFile.ChecksumType;
76
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
77
import edu.harvard.iq.dataverse.settings.JvmSettings;
78
import edu.harvard.iq.dataverse.util.json.JsonLDTerm;
79
import java.util.Optional;
80

81
public class BagGenerator {
82

83
    private static final Logger logger = Logger.getLogger(BagGenerator.class.getCanonicalName());
×
84

85
    private ParallelScatterZipCreator scatterZipCreator = null;
×
86
    private ScatterZipOutputStream dirs = null;
×
87

88
    private JsonArray aggregates = null;
×
89
    private ArrayList<String> resourceIndex = null;
×
90
    private Boolean[] resourceUsed = null;
×
91
    private HashMap<String, String> pidMap = new LinkedHashMap<String, String>();
×
92
    private HashMap<String, String> checksumMap = new LinkedHashMap<String, String>();
×
93

94
    private int timeout = 60;
×
95
    private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000)
×
96
            .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000)
×
97
            .setCookieSpec(CookieSpecs.STANDARD).build();
×
98
    protected CloseableHttpClient client;
99
    private PoolingHttpClientConnectionManager cm = null;
×
100

101
    private ChecksumType hashtype = null;
×
102
    private boolean ignorehashes = false;
×
103

104
    private long dataCount = 0l;
×
105
    private long totalDataSize = 0l;
×
106
    private long maxFileSize = 0l;
×
107
    private Set<String> mimetypes = new TreeSet<String>();
×
108

109
    private String bagID = null;
×
110
    private String bagPath = "/tmp";
×
111
    String bagName = null;
×
112

113
    private String apiKey = null;
×
114

115
    private jakarta.json.JsonObject oremapObject;
116
    private JsonObject aggregation;
117

118
    private String dataciteXml;
119

120
    private boolean usetemp = false;
×
121

122
    private int numConnections = 8;
×
123
    public static final String BAG_GENERATOR_THREADS = ":BagGeneratorThreads";
124

125
    private OREMap oremap;
126

127
    static PrintWriter pw = null;
×
128

129
    /**
130
     * This BagGenerator creates a BagIt version 1.0
131
     * (https://tools.ietf.org/html/draft-kunze-bagit-16) compliant bag that is also
132
     * minimally compatible with the Research Data Repository Interoperability WG
133
     * Final Recommendations (DOI: 10.15497/RDA00025). It works by parsing the
134
     * submitted OAI-ORE Map file, using the metadata therein to create required
135
     * BagIt metadata, and using the schema.org/sameAs entries for
136
     * AggregatedResources as a way to retrieve these files and store them in the
137
     * /data directory within the BagIt structure. The Bag is zipped. File retrieval
138
     * and zipping are done in parallel, using a connection pool. The required space
139
     * on disk is ~ n+1/n of the final bag size, e.g. 125% of the bag size for a
140
     * 4-way parallel zip operation.
141
     * @throws Exception 
142
     * @throws JsonSyntaxException 
143
     */
144

145
    public BagGenerator(OREMap oreMap, String dataciteXml) throws JsonSyntaxException, Exception {
×
146
        this.oremap = oreMap;
×
147
        this.oremapObject = oreMap.getOREMap();
×
148
                //(JsonObject) new JsonParser().parse(oreMap.getOREMap().toString());
149
        this.dataciteXml = dataciteXml;
×
150

151
        try {
152
            // Using Dataverse, all the URLs to be retrieved should be on the current server, so allowing self-signed certs and not verifying hostnames are useful in testing and 
153
            // shouldn't be a significant security issue. This should not be allowed for arbitrary OREMap sources.
154
            SSLContextBuilder builder = new SSLContextBuilder();
×
155
            try {
156
                builder.loadTrustMaterial(null, new TrustSelfSignedStrategy());
×
157
            } catch (KeyStoreException e) {
×
158
                e.printStackTrace();
×
159
            }
×
160

161
            SSLConnectionSocketFactory sslConnectionFactory = new SSLConnectionSocketFactory(builder.build(), NoopHostnameVerifier.INSTANCE);
×
162

163
            Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
×
164
                            .register("http", PlainConnectionSocketFactory.getSocketFactory())
×
165
                    .register("https", sslConnectionFactory).build();
×
166
            cm = new PoolingHttpClientConnectionManager(registry);
×
167

168
            cm.setDefaultMaxPerRoute(numConnections);
×
169
            cm.setMaxTotal(numConnections > 20 ? numConnections : 20);
×
170

171
            client = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build();
×
172

173
            scatterZipCreator = new ParallelScatterZipCreator(Executors.newFixedThreadPool(numConnections));
×
174
        } catch (NoSuchAlgorithmException | KeyManagementException e) {
×
175
            logger.warning("Aint gonna work");
×
176
            e.printStackTrace();
×
177
        }
×
178
    }
×
179

180
    public void setIgnoreHashes(boolean val) {
181
        ignorehashes = val;
×
182
    }
×
183
    
184
    public void setDefaultCheckSumType(ChecksumType type) {
185
            hashtype=type;
×
186
    }
×
187
    
188
    public static void println(String s) {
189
        System.out.println(s);
×
190
        System.out.flush();
×
191
        if (pw != null) {
×
192
            pw.println(s);
×
193
            pw.flush();
×
194
        }
195
        return;
×
196
    }
197

198
    /*
199
     * Full workflow to generate new BagIt bag from ORE Map Url and to write the bag
200
     * to the provided output stream (Ex: File OS, FTP OS etc.).
201
     * 
202
     * @return success true/false
203
     */
204
    public boolean generateBag(OutputStream outputStream) throws Exception {
205
        
206

207
        File tmp = File.createTempFile("qdr-scatter-dirs", "tmp");
×
208
        dirs = ScatterZipOutputStream.fileBased(tmp);
×
209
        // The oremapObject is javax.json.JsonObject and we need com.google.gson.JsonObject for the aggregation object
210
        aggregation = (JsonObject) new JsonParser().parse(oremapObject.getJsonObject(JsonLDTerm.ore("describes").getLabel()).toString());
×
211

212
        String pidUrlString = aggregation.get("@id").getAsString();
×
213
        String pidString=PidUtil.parseAsGlobalID(pidUrlString).asString();
×
214
        bagID = pidString + "v."
×
215
                + aggregation.get(JsonLDTerm.schemaOrg("version").getLabel()).getAsString();
×
216
        
217
        logger.info("Generating Bag: " + bagID);
×
218
        try {
219
            // Create valid filename from identifier and extend path with
220
            // two levels of hash-based subdirs to help distribute files
221
            bagName = getValidName(bagID);
×
222
        } catch (Exception e) {
×
223
            logger.severe("Couldn't create valid filename: " + e.getLocalizedMessage());
×
224
            return false;
×
225
        }
×
226
        // Create data dir in bag, also creates parent bagName dir
227
        String currentPath = "data/";
×
228
        createDir(currentPath);
×
229

230
        aggregates = aggregation.getAsJsonArray(JsonLDTerm.ore("aggregates").getLabel());
×
231

232
        if (aggregates != null) {
×
233
            // Add container and data entries
234
            // Setup global index of the aggregation and all aggregated
235
            // resources by Identifier
236
            resourceIndex = indexResources(aggregation.get("@id").getAsString(), aggregates);
×
237
            // Setup global list of succeed(true), fail(false), notused
238
            // (null) flags
239
            resourceUsed = new Boolean[aggregates.size() + 1];
×
240
            // Process current container (the aggregation itself) and its
241
            // children
242
            processContainer(aggregation, currentPath);
×
243
        }
244
        // Create manifest files
245
        // pid-mapping.txt - a DataOne recommendation to connect ids and
246
        // in-bag path/names
247
        StringBuffer pidStringBuffer = new StringBuffer();
×
248
        boolean first = true;
×
249
        for (Entry<String, String> pidEntry : pidMap.entrySet()) {
×
250
            if (!first) {
×
251
                pidStringBuffer.append("\r\n");
×
252
            } else {
253
                first = false;
×
254
            }
255
            String path = pidEntry.getValue();
×
256
            pidStringBuffer.append(pidEntry.getKey() + " " + path);
×
257
        }
×
258
        createDir("metadata/");
×
259
        createFileFromString("metadata/pid-mapping.txt", pidStringBuffer.toString());
×
260
        // Hash manifest - a hash manifest is required
261
        // by Bagit spec
262
        StringBuffer sha1StringBuffer = new StringBuffer();
×
263
        first = true;
×
264
        for (Entry<String, String> sha1Entry : checksumMap.entrySet()) {
×
265
            if (!first) {
×
266
                sha1StringBuffer.append("\r\n");
×
267
            } else {
268
                first = false;
×
269
            }
270
            String path = sha1Entry.getKey();
×
271
            sha1StringBuffer.append(sha1Entry.getValue() + " " + path);
×
272
        }
×
273
        if (!(hashtype == null)) {
×
274
            String manifestName = "manifest-";
×
275
            if (hashtype.equals(DataFile.ChecksumType.SHA1)) {
×
276
                manifestName = manifestName + "sha1.txt";
×
277
            } else if (hashtype.equals(DataFile.ChecksumType.SHA256)) {
×
278
                manifestName = manifestName + "sha256.txt";
×
279
            } else if (hashtype.equals(DataFile.ChecksumType.SHA512)) {
×
280
                manifestName = manifestName + "sha512.txt";
×
281
            } else if (hashtype.equals(DataFile.ChecksumType.MD5)) {
×
282
                manifestName = manifestName + "md5.txt";
×
283
            } else {
284
                logger.warning("Unsupported Hash type: " + hashtype);
×
285
            }
286
            createFileFromString(manifestName, sha1StringBuffer.toString());
×
287
        } else {
×
288
            logger.warning("No Hash values (no files?) sending empty manifest to nominally comply with BagIT specification requirement");
×
289
            createFileFromString("manifest-md5.txt", "");
×
290
        }
291
        // bagit.txt - Required by spec
292
        createFileFromString("bagit.txt", "BagIt-Version: 1.0\r\nTag-File-Character-Encoding: UTF-8");
×
293

294
        aggregation.addProperty(JsonLDTerm.totalSize.getLabel(), totalDataSize);
×
295
        aggregation.addProperty(JsonLDTerm.fileCount.getLabel(), dataCount);
×
296
        JsonArray mTypes = new JsonArray();
×
297
        for (String mt : mimetypes) {
×
298
            mTypes.add(new JsonPrimitive(mt));
×
299
        }
×
300
        aggregation.add(JsonLDTerm.dcTerms("format").getLabel(), mTypes);
×
301
        aggregation.addProperty(JsonLDTerm.maxFileSize.getLabel(), maxFileSize);
×
302
        // Serialize oremap itself
303
        // FixMe - add missing hash values if needed and update context
304
        // (read and cache files or read twice?)
305
        createFileFromString("metadata/oai-ore.jsonld", oremapObject.toString());
×
306

307
        createFileFromString("metadata/datacite.xml", dataciteXml);
×
308

309
        // Add a bag-info file
310
        createFileFromString("bag-info.txt", generateInfoFile());
×
311

312
        logger.fine("Creating bag: " + bagName);
×
313

314
        ZipArchiveOutputStream zipArchiveOutputStream = new ZipArchiveOutputStream(outputStream);
×
315

316
        /*
317
         * Add all the waiting contents - dirs created first, then data files are
318
         * retrieved via URLs in parallel (defaults to one thread per processor)
319
         * directly to the zip file
320
         */
321
        logger.fine("Starting write");
×
322
        writeTo(zipArchiveOutputStream);
×
323
        logger.fine("Zipfile Written");
×
324
        // Finish
325
        zipArchiveOutputStream.close();
×
326
        logger.fine("Closed");
×
327

328
        // Validate oremap - all entries are part of the collection
329
        for (int i = 0; i < resourceUsed.length; i++) {
×
330
            Boolean b = resourceUsed[i];
×
331
            if (b == null) {
×
332
                logger.warning("Problem: " + pidMap.get(resourceIndex.get(i)) + " was not used");
×
333
            } else if (!b) {
×
334
                logger.warning("Problem: " + pidMap.get(resourceIndex.get(i)) + " was not included successfully");
×
335
            } else {
336
                // Successfully included - now check for hash value and
337
                // generate if needed
338
                if (i > 0) { // Not root container
×
339
                    if (!checksumMap.containsKey(pidMap.get(resourceIndex.get(i)))) {
×
340

341
                        if (!childIsContainer(aggregates.get(i - 1).getAsJsonObject()))
×
342
                            logger.warning("Missing checksum hash for: " + resourceIndex.get(i));
×
343
                        // FixMe - actually generate it before adding the
344
                        // oremap
345
                        // to the zip
346
                    }
347
                }
348
            }
349

350
        }
351

352
        logger.info("Created bag: " + bagName);
×
353
        client.close();
×
354
        return true;
×
355

356
    }
357

358
    public boolean generateBag(String bagName, boolean temp) {
359
        usetemp = temp;
×
360
        FileOutputStream bagFileOS = null;
×
361
        try {
362
            File origBagFile = getBagFile(bagName);
×
363
            File bagFile = origBagFile;
×
364
            if (usetemp) {
×
365
                bagFile = new File(bagFile.getAbsolutePath() + ".tmp");
×
366
                logger.fine("Writing to: " + bagFile.getAbsolutePath());
×
367
            }
368
            // Create an output stream backed by the file
369
            bagFileOS = new FileOutputStream(bagFile);
×
370
            if (generateBag(bagFileOS)) {
×
371
                //The generateBag call sets this.bagName to the correct value
372
                validateBagFile(bagFile);
×
373
                if (usetemp) {
×
374
                    logger.fine("Moving tmp zip");
×
375
                    origBagFile.delete();
×
376
                    bagFile.renameTo(origBagFile);
×
377
                }
378
                return true;
×
379
            } else {
380
                return false;
×
381
            }
382
        } catch (Exception e) {
×
383
            logger.log(Level.SEVERE,"Bag Exception: ", e);
×
384
            e.printStackTrace();
×
385
            logger.warning("Failure: Processing failure during Bagit file creation");
×
386
            return false;
×
387
        } finally {
388
            IOUtils.closeQuietly(bagFileOS);
×
389
        }
390
    }
391

392
    public void validateBag(String bagId) {
393
        logger.info("Validating Bag");
×
394
        ZipFile zf = null;
×
395
        InputStream is = null;
×
396
        try {
397
            File bagFile = getBagFile(bagId);
×
398
            zf = new ZipFile(bagFile);
×
399
            ZipArchiveEntry entry = zf.getEntry(getValidName(bagId) + "/manifest-sha1.txt");
×
400
            if (entry != null) {
×
401
                logger.info("SHA1 hashes used");
×
402
                hashtype = DataFile.ChecksumType.SHA1;
×
403
            } else {
404
                entry = zf.getEntry(getValidName(bagId) + "/manifest-sha512.txt");
×
405
                if (entry != null) {
×
406
                    logger.info("SHA512 hashes used");
×
407
                    hashtype = DataFile.ChecksumType.SHA512;
×
408
                } else {
409
                    entry = zf.getEntry(getValidName(bagId) + "/manifest-sha256.txt");
×
410
                    if (entry != null) {
×
411
                        logger.info("SHA256 hashes used");
×
412
                        hashtype = DataFile.ChecksumType.SHA256;
×
413
                    } else {
414
                        entry = zf.getEntry(getValidName(bagId) + "/manifest-md5.txt");
×
415
                        if (entry != null) {
×
416
                            logger.info("MD5 hashes used");
×
417
                            hashtype = DataFile.ChecksumType.MD5;
×
418
                        }
419
                    }
420
                }
421
            }
422
            if (entry == null)
×
423
                throw new IOException("No manifest file found");
×
424
            is = zf.getInputStream(entry);
×
425
            BufferedReader br = new BufferedReader(new InputStreamReader(is));
×
426
            String line = br.readLine();
×
427
            while (line != null) {
×
428
                logger.fine("Hash entry: " + line);
×
429
                int breakIndex = line.indexOf(' ');
×
430
                String hash = line.substring(0, breakIndex);
×
431
                String path = line.substring(breakIndex + 1);
×
432
                logger.fine("Adding: " + path + " with hash: " + hash);
×
433
                checksumMap.put(path, hash);
×
434
                line = br.readLine();
×
435
            }
×
436
            IOUtils.closeQuietly(is);
×
437
            logger.info("HashMap Map contains: " + checksumMap.size() + " entries");
×
438
            checkFiles(checksumMap, bagFile);
×
439
        } catch (IOException io) {
×
440
            logger.log(Level.SEVERE,"Could not validate Hashes", io);
×
441
        } catch (Exception e) {
×
442
            logger.log(Level.SEVERE,"Could not validate Hashes", e);
×
443
        } finally {
444
            IOUtils.closeQuietly(zf);
×
445
        }
446
        return;
×
447
    }
448

449
    public File getBagFile(String bagID) throws Exception {
450

451
        String bagPath = Paths.get(getBagPath()).toString();
×
452
        // Create the bag file on disk
453
        File parent = new File(bagPath);
×
454
        if (!parent.exists()) {
×
455
            parent.mkdirs();
×
456
        }
457
        // Create known-good filename
458
        bagName = getValidName(bagID);
×
459
        File bagFile = new File(bagPath, bagName + ".zip");
×
460
        logger.fine("BagPath: " + bagFile.getAbsolutePath());
×
461
        // Create an output stream backed by the file
462
        return bagFile;
×
463
    }
464

465
    private void validateBagFile(File bagFile) throws IOException {
466
        // Run a confirmation test - should verify all files and hashes
467
        
468
        // Check files calculates the hashes and file sizes and reports on
469
        // whether hashes are correct
470
        checkFiles(checksumMap, bagFile);
×
471

472
        logger.info("Data Count: " + dataCount);
×
473
        logger.info("Data Size: " + totalDataSize);
×
474
    }
×
475

476
    public static String getValidName(String bagName) {
477
        // Create known-good filename - no spaces, no file-system separators.
478
        return bagName.replaceAll("\\W", "-");
×
479
    }
480

481
    private void processContainer(JsonObject item, String currentPath) throws IOException {
482
        JsonArray children = getChildren(item);
×
483
        HashSet<String> titles = new HashSet<String>();
×
484
        String title = null;
×
485
        if (item.has(JsonLDTerm.dcTerms("Title").getLabel())) {
×
486
            title = item.get("Title").getAsString();
×
487
        } else if (item.has(JsonLDTerm.schemaOrg("name").getLabel())) {
×
488
            title = item.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString();
×
489
        }
490
        logger.fine("Adding " + title + "/ to path " + currentPath);
×
491
        currentPath = currentPath + title + "/";
×
492
        int containerIndex = -1;
×
493
        try {
494
            createDir(currentPath);
×
495
            // Add containers to pid map and mark as 'used', but no sha1 hash
496
            // value
497
            containerIndex = getUnusedIndexOf(item.get("@id").getAsString());
×
498
            resourceUsed[containerIndex] = true;
×
499
            pidMap.put(item.get("@id").getAsString(), currentPath);
×
500

501
        } catch (InterruptedException | IOException | ExecutionException e) {
×
502
            e.printStackTrace();
×
503
            logger.severe(e.getMessage());
×
504
            if (containerIndex != -1) {
×
505
                resourceUsed[containerIndex] = false;
×
506
            }
507
            throw new IOException("Unable to create bag");
×
508

509
        }
×
510
        for (int i = 0; i < children.size(); i++) {
×
511

512
            // Find the ith child in the overall array of aggregated
513
            // resources
514
            String childId = children.get(i).getAsString();
×
515
            logger.fine("Processing: " + childId);
×
516
            int index = getUnusedIndexOf(childId);
×
517
            if (resourceUsed[index] != null) {
×
518
                System.out.println("Warning: reusing resource " + index);
×
519
            }
520

521
            // Aggregation is at index 0, so need to shift by 1 for aggregates
522
            // entries
523
            JsonObject child = aggregates.get(index - 1).getAsJsonObject();
×
524
            if (childIsContainer(child)) {
×
525
                // create dir and process children
526
                // processContainer will mark this item as used
527
                processContainer(child, currentPath);
×
528
            } else {
529
                resourceUsed[index] = true;
×
530
                // add item
531
                // ToDo
532
                String dataUrl = child.get(JsonLDTerm.schemaOrg("sameAs").getLabel()).getAsString();
×
533
                logger.fine("File url: " + dataUrl);
×
534
                String childTitle = child.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString();
×
535
                if (titles.contains(childTitle)) {
×
536
                    logger.warning("**** Multiple items with the same title in: " + currentPath);
×
537
                    logger.warning("**** Will cause failure in hash and size validation in: " + bagID);
×
538
                } else {
539
                    titles.add(childTitle);
×
540
                }
541
                String childPath = currentPath + childTitle;
×
542
                JsonElement directoryLabel = child.get(JsonLDTerm.DVCore("directoryLabel").getLabel());
×
543
                if(directoryLabel!=null) {
×
544
                    childPath=currentPath + directoryLabel.getAsString() + "/" + childTitle;
×
545
                }
546
                
547

548
                String childHash = null;
×
549
                if (child.has(JsonLDTerm.checksum.getLabel())) {
×
550
                    ChecksumType childHashType = ChecksumType.fromString(
×
551
                            child.getAsJsonObject(JsonLDTerm.checksum.getLabel()).get("@type").getAsString());
×
552
                    if (hashtype == null) {
×
553
                            //If one wasn't set as a default, pick up what the first child with one uses
554
                        hashtype = childHashType;
×
555
                    }
556
                    if (hashtype != null && !hashtype.equals(childHashType)) {
×
557
                        logger.warning("Multiple hash values in use - will calculate " + hashtype.toString()
×
558
                            + " hashes for " + childTitle);
559
                    } else {
560
                        childHash = child.getAsJsonObject(JsonLDTerm.checksum.getLabel()).get("@value").getAsString();
×
561
                        if (checksumMap.containsValue(childHash)) {
×
562
                            // Something else has this hash
563
                            logger.warning("Duplicate/Collision: " + child.get("@id").getAsString() + " has SHA1 Hash: "
×
564
                                + childHash + " in: " + bagID);
565
                        }
566
                        logger.fine("Adding " + childPath + " with hash " + childHash + " to checksumMap");
×
567
                        checksumMap.put(childPath, childHash);
×
568
                    }
569
                }
570
                if ((hashtype == null) | ignorehashes) {
×
571
                    // Pick sha512 when ignoring hashes or none exist
572
                    hashtype = DataFile.ChecksumType.SHA512;
×
573
                }
574
                try {
575
                    if ((childHash == null) | ignorehashes) {
×
576
                        // Generate missing hashInputStream inputStream = null;
577
                        InputStream inputStream = null;
×
578
                        try {
579
                            inputStream = getInputStreamSupplier(dataUrl).get();
×
580

581
                            if (hashtype != null) {
×
582
                                if (hashtype.equals(DataFile.ChecksumType.SHA1)) {
×
583
                                    childHash = DigestUtils.sha1Hex(inputStream);
×
584
                                } else if (hashtype.equals(DataFile.ChecksumType.SHA256)) {
×
585
                                    childHash = DigestUtils.sha256Hex(inputStream);
×
586
                                } else if (hashtype.equals(DataFile.ChecksumType.SHA512)) {
×
587
                                    childHash = DigestUtils.sha512Hex(inputStream);
×
588
                                } else if (hashtype.equals(DataFile.ChecksumType.MD5)) {
×
589
                                    childHash = DigestUtils.md5Hex(inputStream);
×
590
                                }
591
                            }
592

593
                        } catch (IOException e) {
×
594
                            logger.severe("Failed to read " + childPath);
×
595
                            throw e;
×
596
                        } finally {
597
                            IOUtils.closeQuietly(inputStream);
×
598
                        }
599
                        if (childHash != null) {
×
600
                            JsonObject childHashObject = new JsonObject();
×
601
                            childHashObject.addProperty("@type", hashtype.toString());
×
602
                            childHashObject.addProperty("@value", childHash);
×
603
                            child.add(JsonLDTerm.checksum.getLabel(), (JsonElement) childHashObject);
×
604

605
                            checksumMap.put(childPath, childHash);
×
606
                        } else {
×
607
                            logger.warning("Unable to calculate a " + hashtype + " for " + dataUrl);
×
608
                        }
609
                    }
610
                    logger.fine("Requesting: " + childPath + " from " + dataUrl);
×
611
                    createFileFromURL(childPath, dataUrl);
×
612
                    dataCount++;
×
613
                    if (dataCount % 1000 == 0) {
×
614
                        logger.info("Retrieval in progress: " + dataCount + " files retrieved");
×
615
                    }
616
                    if (child.has(JsonLDTerm.filesize.getLabel())) {
×
617
                        Long size = child.get(JsonLDTerm.filesize.getLabel()).getAsLong();
×
618
                        totalDataSize += size;
×
619
                        if (size > maxFileSize) {
×
620
                            maxFileSize = size;
×
621
                        }
622
                    }
623
                    if (child.has(JsonLDTerm.schemaOrg("fileFormat").getLabel())) {
×
624
                        mimetypes.add(child.get(JsonLDTerm.schemaOrg("fileFormat").getLabel()).getAsString());
×
625
                    }
626

627
                } catch (Exception e) {
×
628
                    resourceUsed[index] = false;
×
629
                    e.printStackTrace();
×
630
                    throw new IOException("Unable to create bag");
×
631
                }
×
632

633
                // Check for nulls!
634
                pidMap.put(child.get("@id").getAsString(), childPath);
×
635

636
            }
637
        }
638
    }
×
639

640
    private int getUnusedIndexOf(String childId) {
641
        int index = resourceIndex.indexOf(childId);
×
642
        if (resourceUsed[index] != null) {
×
643
            System.out.println("Warning: reusing resource " + index);
×
644
        }
645

646
        while (resourceUsed[index] != null) {
×
647
            int offset = index;
×
648
            index = offset + 1 + resourceIndex.subList(offset + 1, resourceIndex.size()).indexOf(childId);
×
649
        }
×
650
        System.out.println("Using index: " + index);
×
651
        if (index == -1) {
×
652
            logger.severe("Reused ID: " + childId + " not found enough times in resource list");
×
653
        }
654
        return index;
×
655
    }
656

657
    private ArrayList<String> indexResources(String aggId, JsonArray aggregates) {
658

659
        ArrayList<String> l = new ArrayList<String>(aggregates.size() + 1);
×
660
        l.add(aggId);
×
661
        for (int i = 0; i < aggregates.size(); i++) {
×
662
            logger.fine("Indexing : " + i + " " + aggregates.get(i).getAsJsonObject().get("@id").getAsString());
×
663
            l.add(aggregates.get(i).getAsJsonObject().get("@id").getAsString());
×
664
        }
665
        logger.fine("Index created for " + aggregates.size() + " entries");
×
666
        return l;
×
667
    }
668

669
    private void createDir(final String name) throws IOException, ExecutionException, InterruptedException {
670

671
        ZipArchiveEntry archiveEntry = new ZipArchiveEntry(bagName + "/" + name);
×
672
        archiveEntry.setMethod(ZipEntry.DEFLATED);
×
673
        InputStreamSupplier supp = new InputStreamSupplier() {
×
674
            public InputStream get() {
675
                return new ByteArrayInputStream(("").getBytes());
×
676
            }
677
        };
678

679
        addEntry(archiveEntry, supp);
×
680
    }
×
681

682
    private void createFileFromString(final String relPath, final String content)
683
            throws IOException, ExecutionException, InterruptedException {
684

685
        ZipArchiveEntry archiveEntry = new ZipArchiveEntry(bagName + "/" + relPath);
×
686
        archiveEntry.setMethod(ZipEntry.DEFLATED);
×
687
        InputStreamSupplier supp = new InputStreamSupplier() {
×
688
            public InputStream get() {
689
                try {
UNCOV
690
                    return new ByteArrayInputStream(content.getBytes("UTF-8"));
×
UNCOV
691
                } catch (UnsupportedEncodingException e) {
×
UNCOV
692
                    e.printStackTrace();
×
693
                }
694
                return null;
×
695
            }
696
        };
697

UNCOV
698
        addEntry(archiveEntry, supp);
×
699
    }
×
700

701
    private void createFileFromURL(final String relPath, final String uri)
702
            throws IOException, ExecutionException, InterruptedException {
703

UNCOV
704
        ZipArchiveEntry archiveEntry = new ZipArchiveEntry(bagName + "/" + relPath);
×
UNCOV
705
        archiveEntry.setMethod(ZipEntry.DEFLATED);
×
706
        InputStreamSupplier supp = getInputStreamSupplier(uri);
×
707
        addEntry(archiveEntry, supp);
×
UNCOV
708
    }
×
709

710
    private void checkFiles(HashMap<String, String> shaMap, File bagFile) {
711
        ExecutorService executor = Executors.newFixedThreadPool(numConnections);
×
712
        ZipFile zf = null;
×
713
        try {
714
            zf = new ZipFile(bagFile);
×
715

716
            BagValidationJob.setZipFile(zf);
×
717
            BagValidationJob.setBagGenerator(this);
×
718
            logger.fine("Validating hashes for zipped data files");
×
719
            int i = 0;
×
720
            for (Entry<String, String> entry : shaMap.entrySet()) {
×
UNCOV
721
                BagValidationJob vj = new BagValidationJob(bagName, entry.getValue(), entry.getKey());
×
722
                executor.execute(vj);
×
723
                i++;
×
UNCOV
724
                if (i % 1000 == 0) {
×
725
                    logger.info("Queuing Hash Validations: " + i);
×
726
                }
727
            }
×
728
            logger.fine("All Hash Validations Queued: " + i);
×
729

730
            executor.shutdown();
×
731
            try {
732
                while (!executor.awaitTermination(10, TimeUnit.MINUTES)) {
×
733
                    logger.fine("Awaiting completion of hash calculations.");
×
734
                }
735
            } catch (InterruptedException e) {
×
UNCOV
736
                logger.log(Level.SEVERE, "Hash Calculations interrupted", e);
×
737
            } 
×
UNCOV
738
        } catch (IOException e1) {
×
739
            // TODO Auto-generated catch block
UNCOV
740
            e1.printStackTrace();
×
741
        } finally {
UNCOV
742
            IOUtils.closeQuietly(zf);
×
743
        }
744
        logger.fine("Hash Validations Completed");
×
745

UNCOV
746
    }
×
747

748
    public void addEntry(ZipArchiveEntry zipArchiveEntry, InputStreamSupplier streamSupplier) throws IOException {
UNCOV
749
        if (zipArchiveEntry.isDirectory() && !zipArchiveEntry.isUnixSymlink())
×
UNCOV
750
            dirs.addArchiveEntry(ZipArchiveEntryRequest.createZipArchiveEntryRequest(zipArchiveEntry, streamSupplier));
×
751
        else
752
            scatterZipCreator.addArchiveEntry(zipArchiveEntry, streamSupplier);
×
753
    }
×
754

755
    public void writeTo(ZipArchiveOutputStream zipArchiveOutputStream)
756
            throws IOException, ExecutionException, InterruptedException {
757
        logger.fine("Writing dirs");
×
758
        dirs.writeTo(zipArchiveOutputStream);
×
UNCOV
759
        dirs.close();
×
UNCOV
760
        logger.fine("Dirs written");
×
UNCOV
761
        scatterZipCreator.writeTo(zipArchiveOutputStream);
×
UNCOV
762
        logger.fine("Files written");
×
763
    }
×
764

765
    static final String CRLF = "\r\n";
766

767
    private String generateInfoFile() {
UNCOV
768
        logger.fine("Generating info file");
×
UNCOV
769
        StringBuffer info = new StringBuffer();
×
770

771
        JsonArray contactsArray = new JsonArray();
×
772
        /* Contact, and it's subfields, are terms from citation.tsv whose mapping to a formal vocabulary and label in the oremap may change
773
         * so we need to find the labels used.
774
         */ 
775
        JsonLDTerm contactTerm = oremap.getContactTerm();
×
UNCOV
776
        if ((contactTerm != null) && aggregation.has(contactTerm.getLabel())) {
×
777

778
            JsonElement contacts = aggregation.get(contactTerm.getLabel());
×
779
            JsonLDTerm contactNameTerm = oremap.getContactNameTerm();
×
780
            JsonLDTerm contactEmailTerm = oremap.getContactEmailTerm();
×
781
            
782
            if (contacts.isJsonArray()) {
×
783
                for (int i = 0; i < contactsArray.size(); i++) {
×
UNCOV
784
                    info.append("Contact-Name: ");
×
UNCOV
785
                    JsonElement person = contactsArray.get(i);
×
786
                    if (person.isJsonPrimitive()) {
×
787
                        info.append(person.getAsString());
×
788
                        info.append(CRLF);
×
789

790
                    } else {
791
                        if(contactNameTerm != null) {
×
792
                          info.append(((JsonObject) person).get(contactNameTerm.getLabel()).getAsString());
×
793
                          info.append(CRLF);
×
794
                        }
UNCOV
795
                        if ((contactEmailTerm!=null) &&((JsonObject) person).has(contactEmailTerm.getLabel())) {
×
UNCOV
796
                            info.append("Contact-Email: ");
×
UNCOV
797
                            info.append(((JsonObject) person).get(contactEmailTerm.getLabel()).getAsString());
×
798
                            info.append(CRLF);
×
799
                        }
800
                    }
801
                }
802
            } else {
UNCOV
803
                info.append("Contact-Name: ");
×
804

805
                if (contacts.isJsonPrimitive()) {
×
806
                    info.append((String) contacts.getAsString());
×
807
                    info.append(CRLF);
×
808

809
                } else {
810
                    JsonObject person = contacts.getAsJsonObject();
×
811
                    if(contactNameTerm != null) {
×
812
                      info.append(person.get(contactNameTerm.getLabel()).getAsString());
×
813
                      info.append(CRLF);
×
814
                    }
UNCOV
815
                    if ((contactEmailTerm!=null) && (person.has(contactEmailTerm.getLabel()))) {
×
UNCOV
816
                        info.append("Contact-Email: ");
×
UNCOV
817
                        info.append(person.get(contactEmailTerm.getLabel()).getAsString());
×
818
                        info.append(CRLF);
×
819
                    }
820
                }
821

822
            }
823
        } else {
×
824
            logger.warning("No contact info available for BagIt Info file");
×
825
        }
826

UNCOV
827
        String orgName = JvmSettings.BAGIT_SOURCE_ORG_NAME.lookupOptional(String.class).orElse("Dataverse Installation (<Site Url>)");
×
828
        String orgAddress = JvmSettings.BAGIT_SOURCEORG_ADDRESS.lookupOptional(String.class).orElse("<Full address>");
×
UNCOV
829
        String orgEmail = JvmSettings.BAGIT_SOURCEORG_EMAIL.lookupOptional(String.class).orElse("<Email address>");
×
830

UNCOV
831
        info.append("Source-Organization: " + orgName);
×
832
        // ToDo - make configurable
UNCOV
833
        info.append(CRLF);
×
834

835
        info.append("Organization-Address: " + WordUtils.wrap(orgAddress, 78, CRLF + " ", true));
×
836

UNCOV
837
        info.append(CRLF);
×
838

839
        // Not a BagIt standard name
UNCOV
840
        info.append("Organization-Email: " + orgEmail);
×
UNCOV
841
        info.append(CRLF);
×
842

843
        info.append("External-Description: ");
×
844
        
845
        /* Description, and it's subfields, are terms from citation.tsv whose mapping to a formal vocabulary and label in the oremap may change
846
         * so we need to find the labels used.
847
         */
848
        JsonLDTerm descriptionTerm = oremap.getDescriptionTerm();
×
UNCOV
849
        JsonLDTerm descriptionTextTerm = oremap.getDescriptionTextTerm();
×
850
        if (descriptionTerm == null) {
×
851
            logger.warning("No description available for BagIt Info file");
×
852
        } else {
853
            info.append(
×
854
                    // FixMe - handle description having subfields better
855
                    WordUtils.wrap(getSingleValue(aggregation.get(descriptionTerm.getLabel()),
×
856
                            descriptionTextTerm.getLabel()), 78, CRLF + " ", true));
×
857

UNCOV
858
            info.append(CRLF);
×
859
        }
860
        info.append("Bagging-Date: ");
×
861
        info.append((new SimpleDateFormat("yyyy-MM-dd").format(Calendar.getInstance().getTime())));
×
UNCOV
862
        info.append(CRLF);
×
863

864
        info.append("External-Identifier: ");
×
865
        info.append(aggregation.get("@id").getAsString());
×
UNCOV
866
        info.append(CRLF);
×
867

868
        info.append("Bag-Size: ");
×
869
        info.append(byteCountToDisplaySize(totalDataSize));
×
870
        info.append(CRLF);
×
871

UNCOV
872
        info.append("Payload-Oxum: ");
×
873
        info.append(Long.toString(totalDataSize));
×
874
        info.append(".");
×
875
        info.append(Long.toString(dataCount));
×
876
        info.append(CRLF);
×
877

878
        info.append("Internal-Sender-Identifier: ");
×
879
        String catalog = BundleUtil.getStringFromBundle("bagit.sourceOrganization") + " Catalog";
×
UNCOV
880
        if (aggregation.has(JsonLDTerm.schemaOrg("includedInDataCatalog").getLabel())) {
×
881
            catalog = aggregation.get(JsonLDTerm.schemaOrg("includedInDataCatalog").getLabel()).getAsString();
×
882
        }
UNCOV
883
        info.append(catalog + ":" + aggregation.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString());
×
UNCOV
884
        info.append(CRLF);
×
885

UNCOV
886
        return info.toString();
×
887

888
    }
889

890
    /**
891
     * Kludge - compound values (e.g. for descriptions) are sent as an array of
892
     * objects containing key/values whereas a single value is sent as one object.
893
     * For cases where multiple values are sent, create a concatenated string so
894
     * that information is not lost.
895
     * 
896
     * @param jsonElement
897
     *            - the root json object
898
     * @param key
899
     *            - the key to find a value(s) for
900
     * @return - a single string
901
     */
902
    String getSingleValue(JsonElement jsonElement, String key) {
UNCOV
903
        String val = "";
×
904
        if(jsonElement.isJsonObject()) {
×
905
            JsonObject jsonObject=jsonElement.getAsJsonObject();
×
906
            val = jsonObject.get(key).getAsString();
×
907
        } else if (jsonElement.isJsonArray()) {
×
908
            
909
            Iterator<JsonElement> iter = jsonElement.getAsJsonArray().iterator();
×
910
            ArrayList<String> stringArray = new ArrayList<String>();
×
UNCOV
911
            while (iter.hasNext()) {
×
912
                stringArray.add(iter.next().getAsJsonObject().getAsJsonPrimitive(key).getAsString());
×
913
            }
914
            if (stringArray.size() > 1) {
×
UNCOV
915
                val = String.join(",", stringArray);
×
916
            } else {
UNCOV
917
                val = stringArray.get(0);
×
918
            }
UNCOV
919
            logger.fine("Multiple values found for: " + key + ": " + val);
×
920
        }
UNCOV
921
        return val;
×
922
    }
923

924
    // Used in validation
925

926
    public void incrementTotalDataSize(long inc) {
UNCOV
927
        totalDataSize += inc;
×
UNCOV
928
    }
×
929

930
    public ChecksumType getHashtype() {
UNCOV
931
        return hashtype;
×
932
    }
933

934
    // Get's all "Has Part" children, standardized to send an array with 0,1, or
935
    // more elements
936
    private static JsonArray getChildren(JsonObject parent) {
937
        JsonElement o = null;
×
938
        o = parent.get(JsonLDTerm.schemaOrg("hasPart").getLabel());
×
939
        if (o == null) {
×
940
            return new JsonArray();
×
941
        } else {
942
            if (o.isJsonArray()) {
×
UNCOV
943
                return (JsonArray) o;
×
944
            } else if (o.isJsonPrimitive()) {
×
945
                JsonArray children = new JsonArray();
×
UNCOV
946
                children.add(o);
×
UNCOV
947
                return (children);
×
948
            }
UNCOV
949
            logger.severe("Error finding children: " + o.toString());
×
UNCOV
950
            return new JsonArray();
×
951
        }
952
    }
953

954
    // Logic to decide if this is a container -
955
    // first check for children, then check for source-specific type indicators
956
    private static boolean childIsContainer(JsonObject item) {
957
        if (getChildren(item).size() != 0) {
×
958
            return true;
×
959
        }
960
        // Also check for any indicative type
961
        Object o = item.get("@type");
×
962
        if (o != null) {
×
963
            if (o instanceof JSONArray) {
×
964
                // As part of an array
UNCOV
965
                for (int i = 0; i < ((JSONArray) o).length(); i++) {
×
966
                    String type = ((JSONArray) o).getString(i).trim();
×
UNCOV
967
                    if ("http://cet.ncsa.uiuc.edu/2016/Folder".equals(type)) {
×
968
                        return true;
×
969
                    }
970
                }
UNCOV
971
            } else if (o instanceof String) {
×
972
                // Or as the only type
UNCOV
973
                String type = ((String) o).trim();
×
974
                if ("http://cet.ncsa.uiuc.edu/2016/Folder".equals(type)) {
×
UNCOV
975
                    return true;
×
976
                }
977
            }
978
        }
UNCOV
979
        return false;
×
980
    }
981

982
    public String getBagPath() {
983
        return bagPath;
×
984
    }
985

986
    public void setBagPath(String bagPath) {
987
        this.bagPath = bagPath;
×
UNCOV
988
    }
×
989

990
    private HttpGet createNewGetRequest(URI url, String returnType) {
991

UNCOV
992
        HttpGet request = null;
×
993

994
        if (apiKey != null) {
×
995
            try {
UNCOV
996
                String urlString = url.toURL().toString();
×
997
                // Add key as param - check whether it is the only param or not
998
                urlString = urlString + ((urlString.indexOf('?') != -1) ? "&key=" : "?key=") + apiKey;
×
UNCOV
999
                request = new HttpGet(new URI(urlString));
×
1000
            } catch (MalformedURLException e) {
×
1001
                // TODO Auto-generated catch block
UNCOV
1002
                e.printStackTrace();
×
1003
            } catch (URISyntaxException e) {
×
1004
                // TODO Auto-generated catch block
1005
                e.printStackTrace();
×
1006
            }
×
1007
        } else {
1008
            request = new HttpGet(url);
×
1009
        }
UNCOV
1010
        if (returnType != null) {
×
UNCOV
1011
            request.addHeader("accept", returnType);
×
1012
        }
1013
        return request;
×
1014
    }
1015

1016
    InputStreamSupplier getInputStreamSupplier(final String uriString) {
1017

1018
        return new InputStreamSupplier() {
×
1019
            public InputStream get() {
1020
                try {
1021
                    URI uri = new URI(uriString);
×
1022

1023
                    int tries = 0;
×
1024
                    while (tries < 5) {
×
1025

1026
                        logger.fine("Get # " + tries + " for " + uriString);
×
UNCOV
1027
                        HttpGet getFile = createNewGetRequest(uri, null);
×
UNCOV
1028
                        logger.finest("Retrieving " + tries + ": " + uriString);
×
1029
                        CloseableHttpResponse response = null;
×
1030
                        try {
1031
                            response = client.execute(getFile);
×
1032
                            // Note - if we ever need to pass an HttpClientContext, we need a new one per
1033
                            // thread.
1034
                            int statusCode = response.getStatusLine().getStatusCode();
×
UNCOV
1035
                            if (statusCode == 200) {
×
1036
                                logger.finest("Retrieved: " + uri);
×
1037
                                return response.getEntity().getContent();
×
1038
                            }
UNCOV
1039
                            logger.warning("Attempt: " + tries + " - Unexpected Status when retrieving " + uriString
×
1040
                                    + " : " + statusCode);
UNCOV
1041
                            if (statusCode < 500) {
×
UNCOV
1042
                                logger.fine("Will not retry for 40x errors");
×
1043
                                tries += 5;
×
1044
                            } else {
1045
                                tries++;
×
1046
                            }
1047
                            // Error handling
1048
                            if (response != null) {
×
1049
                                try {
1050
                                    EntityUtils.consumeQuietly(response.getEntity());
×
UNCOV
1051
                                    response.close();
×
1052
                                } catch (IOException io) {
×
1053
                                    logger.warning(
×
1054
                                            "Exception closing response after status: " + statusCode + " on " + uri);
1055
                                }
×
1056
                            }
UNCOV
1057
                        } catch (ClientProtocolException e) {
×
UNCOV
1058
                            tries += 5;
×
1059
                            // TODO Auto-generated catch block
1060
                            e.printStackTrace();
×
UNCOV
1061
                        } catch (IOException e) {
×
1062
                            // Retry if this is a potentially temporary error such
1063
                            // as a timeout
UNCOV
1064
                            tries++;
×
1065
                            logger.log(Level.WARNING, "Attempt# " + tries + " : Unable to retrieve file: " + uriString,
×
1066
                                    e);
UNCOV
1067
                            if (tries == 5) {
×
1068
                                logger.severe("Final attempt failed for " + uriString);
×
1069
                            }
1070
                            e.printStackTrace();
×
UNCOV
1071
                        }
×
1072

1073
                    }
×
1074

1075
                } catch (URISyntaxException e) {
×
1076
                    // TODO Auto-generated catch block
UNCOV
1077
                    e.printStackTrace();
×
UNCOV
1078
                }
×
UNCOV
1079
                logger.severe("Could not read: " + uriString);
×
UNCOV
1080
                return null;
×
1081
            }
1082
        };
1083
    }
1084

1085
    /**
1086
     * Adapted from org/apache/commons/io/FileUtils.java change to SI - add 2 digits
1087
     * of precision
1088
     */
1089
    /**
1090
     * The number of bytes in a kilobyte.
1091
     */
1092
    public static final long ONE_KB = 1000;
1093

1094
    /**
1095
     * The number of bytes in a megabyte.
1096
     */
1097
    public static final long ONE_MB = ONE_KB * ONE_KB;
1098

1099
    /**
1100
     * The number of bytes in a gigabyte.
1101
     */
1102
    public static final long ONE_GB = ONE_KB * ONE_MB;
1103

1104
    /**
1105
     * Returns a human-readable version of the file size, where the input represents
1106
     * a specific number of bytes.
1107
     *
1108
     * @param size
1109
     *            the number of bytes
1110
     * @return a human-readable display value (includes units)
1111
     */
1112
    public static String byteCountToDisplaySize(long size) {
1113
        String displaySize;
1114

1115
        if (size / ONE_GB > 0) {
×
UNCOV
1116
            displaySize = String.valueOf(Math.round(size / (ONE_GB / 100.0d)) / 100.0) + " GB";
×
1117
        } else if (size / ONE_MB > 0) {
×
UNCOV
1118
            displaySize = String.valueOf(Math.round(size / (ONE_MB / 100.0d)) / 100.0) + " MB";
×
1119
        } else if (size / ONE_KB > 0) {
×
UNCOV
1120
            displaySize = String.valueOf(Math.round(size / (ONE_KB / 100.0d)) / 100.0) + " KB";
×
1121
        } else {
UNCOV
1122
            displaySize = String.valueOf(size) + " bytes";
×
1123
        }
1124
        return displaySize;
×
1125
    }
1126

1127
    public void setAuthenticationKey(String tokenString) {
1128
        apiKey = tokenString;
×
1129
    }
×
1130

1131
    public void setNumConnections(int numConnections) {
UNCOV
1132
        this.numConnections = numConnections;
×
UNCOV
1133
        logger.fine("BagGenerator will use " + numConnections + " threads");
×
UNCOV
1134
    }
×
1135

1136
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc