• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #22985

23 Aug 2024 06:32PM CUT coverage: 20.61% (-0.2%) from 20.791%
#22985

Pull #10781

github

landreev
added an upfront locks check to the /addGlobusFiles api #10623
Pull Request #10781: Improved handling of Globus uploads

4 of 417 new or added lines in 15 files covered. (0.96%)

4194 existing lines in 35 files now uncovered.

17388 of 84365 relevant lines covered (20.61%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java
1
package edu.harvard.iq.dataverse.engine.command.impl;
2

3
import com.google.auth.oauth2.ServiceAccountCredentials;
4
import com.google.cloud.storage.Blob;
5
import com.google.cloud.storage.Bucket;
6
import com.google.cloud.storage.Storage;
7
import com.google.cloud.storage.StorageException;
8
import com.google.cloud.storage.StorageOptions;
9
import edu.harvard.iq.dataverse.Dataset;
10
import edu.harvard.iq.dataverse.DatasetLock.Reason;
11
import edu.harvard.iq.dataverse.DatasetVersion;
12
import edu.harvard.iq.dataverse.authorization.Permission;
13
import edu.harvard.iq.dataverse.authorization.users.ApiToken;
14
import edu.harvard.iq.dataverse.engine.command.Command;
15
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
16
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
17
import edu.harvard.iq.dataverse.settings.JvmSettings;
18
import edu.harvard.iq.dataverse.workflow.step.Failure;
19
import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult;
20
import org.apache.commons.codec.binary.Hex;
21

22
import jakarta.json.Json;
23
import jakarta.json.JsonObjectBuilder;
24
import java.io.File;
25
import java.io.FileInputStream;
26
import java.io.IOException;
27
import java.io.PipedInputStream;
28
import java.io.PipedOutputStream;
29
import java.nio.charset.Charset;
30
import java.security.DigestInputStream;
31
import java.security.MessageDigest;
32
import java.util.Map;
33
import java.util.logging.Logger;
34

35
@RequiredPermissions(Permission.PublishDataset)
36
public class GoogleCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command<DatasetVersion> {
37

UNCOV
38
    private static final Logger logger = Logger.getLogger(GoogleCloudSubmitToArchiveCommand.class.getName());
×
39
    private static final String GOOGLECLOUD_BUCKET = ":GoogleCloudBucket";
40
    private static final String GOOGLECLOUD_PROJECT = ":GoogleCloudProject";
41

42
    public GoogleCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) {
43
        super(aRequest, version);
×
UNCOV
44
    }
×
45

46
    @Override
47
    public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map<String, String> requestedSettings) {
48
        logger.fine("In GoogleCloudSubmitToArchiveCommand...");
×
49
        String bucketName = requestedSettings.get(GOOGLECLOUD_BUCKET);
×
50
        String projectName = requestedSettings.get(GOOGLECLOUD_PROJECT);
×
51
        logger.fine("Project: " + projectName + " Bucket: " + bucketName);
×
UNCOV
52
        if (bucketName != null && projectName != null) {
×
53
            Storage storage;
54
            //Set a failure status that will be updated if we succeed
55
            JsonObjectBuilder statusObject = Json.createObjectBuilder();
×
56
            statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE);
×
UNCOV
57
            statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred");
×
58
            
UNCOV
59
            String cloudKeyFile = JvmSettings.FILES_DIRECTORY.lookup() + File.separator + "googlecloudkey.json";
×
60
            
61
            try (FileInputStream cloudKeyStream = new FileInputStream(cloudKeyFile)) {
×
62
                storage = StorageOptions.newBuilder()
×
63
                        .setCredentials(ServiceAccountCredentials.fromStream(cloudKeyStream))
×
64
                        .setProjectId(projectName)
×
65
                        .build()
×
66
                        .getService();
×
UNCOV
67
                Bucket bucket = storage.get(bucketName);
×
68

69
                Dataset dataset = dv.getDataset();
×
UNCOV
70
                if (dataset.getLockFor(Reason.finalizePublication) == null) {
×
71

72
                    String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-')
×
UNCOV
73
                            .replace('.', '-').toLowerCase();
×
74

75
                    String dataciteXml = getDataCiteXml(dv);
×
76
                    MessageDigest messageDigest = MessageDigest.getInstance("MD5");
×
77
                    try (PipedInputStream dataciteIn = new PipedInputStream();
×
UNCOV
78
                            DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) {
×
79
                        // Add datacite.xml file
80

UNCOV
81
                        Thread dcThread = new Thread(new Runnable() {
×
82
                            public void run() {
UNCOV
83
                                try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) {
×
84

85
                                    dataciteOut.write(dataciteXml.getBytes(Charset.forName("utf-8")));
×
86
                                    dataciteOut.close();
×
87
                                    success = true;
×
88
                                } catch (Exception e) {
×
UNCOV
89
                                    logger.severe("Error creating datacite.xml: " + e.getMessage());
×
90
                                    // TODO Auto-generated catch block
UNCOV
91
                                    e.printStackTrace();
×
92
                                    // throw new RuntimeException("Error creating datacite.xml: " + e.getMessage());
93
                                }
×
UNCOV
94
                            }
×
95
                        });
UNCOV
96
                        dcThread.start();
×
97
                        // Have seen Pipe Closed errors for other archivers when used as a workflow
98
                        // without this delay loop
99
                        int i = 0;
×
100
                        while (digestInputStream.available() <= 0 && i < 100) {
×
101
                            Thread.sleep(10);
×
UNCOV
102
                            i++;
×
103
                        }
UNCOV
104
                        Blob dcXml = bucket.create(spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml", digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist());
×
105

106
                        dcThread.join();
×
107
                        String checksum = dcXml.getMd5ToHexString();
×
108
                        logger.fine("Content: datacite.xml added with checksum: " + checksum);
×
109
                        String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest());
×
110
                        if (!success || !checksum.equals(localchecksum)) {
×
111
                            logger.severe("Failure on " + spaceName);
×
UNCOV
112
                            logger.severe(success ? checksum + " not equal to " + localchecksum : "datacite.xml transfer did not succeed");
×
113
                            try {
114
                                dcXml.delete(Blob.BlobSourceOption.generationMatch());
×
115
                            } catch (StorageException se) {
×
116
                                logger.warning(se.getMessage());
×
117
                            }
×
UNCOV
118
                            return new Failure("Error in transferring DataCite.xml file to GoogleCloud",
×
119
                                    "GoogleCloud Submission Failure: incomplete metadata transfer");
120
                        }
121

122
                        // Store BagIt file
123
                        success = false;
×
UNCOV
124
                        String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip";
×
125

126
                        // Add BagIt ZIP file
127
                        // Google uses MD5 as one way to verify the
128
                        // transfer
129
                        messageDigest = MessageDigest.getInstance("MD5");
×
130
                        try (PipedInputStream in = new PipedInputStream(100000);
×
131
                                DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) {
×
132
                            Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, token);
×
133
                            Blob bag = bucket.create(spaceName + "/" + fileName, digestInputStream2, "application/zip",
×
134
                                    Bucket.BlobWriteOption.doesNotExist());
×
135
                            if (bag.getSize() == 0) {
×
UNCOV
136
                                throw new IOException("Empty Bag");
×
137
                            }
UNCOV
138
                            bagThread.join();
×
139

140
                            checksum = bag.getMd5ToHexString();
×
141
                            logger.fine("Bag: " + fileName + " added with checksum: " + checksum);
×
142
                            localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest());
×
143
                            if (!success || !checksum.equals(localchecksum)) {
×
144
                                logger.severe(success ? checksum + " not equal to " + localchecksum
×
UNCOV
145
                                        : "bag transfer did not succeed");
×
146
                                try {
147
                                    bag.delete(Blob.BlobSourceOption.generationMatch());
×
148
                                } catch (StorageException se) {
×
149
                                    logger.warning(se.getMessage());
×
150
                                }
×
UNCOV
151
                                return new Failure("Error in transferring Zip file to GoogleCloud",
×
152
                                        "GoogleCloud Submission Failure: incomplete archive transfer");
153
                            }
UNCOV
154
                        }
×
155

UNCOV
156
                        logger.fine("GoogleCloud Submission step: Content Transferred");
×
157

158
                        // Document the location of dataset archival copy location (actually the URL
159
                        // where you can view it as an admin)
160
                        // Changed to point at bucket where the zip and datacite.xml are visible
161

162
                        StringBuffer sb = new StringBuffer("https://console.cloud.google.com/storage/browser/");
×
163
                        sb.append(bucketName + "/" + spaceName);
×
164
                        statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS);
×
UNCOV
165
                        statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, sb.toString());
×
166
                        
167
                    }
×
168
                } else {
×
169
                    logger.warning("GoogleCloud Submision Workflow aborted: Dataset locked for pidRegister");
×
UNCOV
170
                    return new Failure("Dataset locked");
×
171
                }
172
            } catch (Exception e) {
×
173
                logger.warning(e.getLocalizedMessage());
×
174
                e.printStackTrace();
×
175
                return new Failure("GoogleCloud Submission Failure",
×
UNCOV
176
                        e.getLocalizedMessage() + ": check log for details");
×
177

178
            } finally {
UNCOV
179
                dv.setArchivalCopyLocation(statusObject.build().toString());
×
180
            }
UNCOV
181
            return WorkflowStepResult.OK;
×
182
        } else {
UNCOV
183
            return new Failure("GoogleCloud Submission not configured - no \":GoogleCloudBucket\"  and/or \":GoogleCloudProject\".");
×
184
        }
185
    }
186

187
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc