• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

kit-data-manager / ro-crate-java / #431

19 May 2025 02:23PM UTC coverage: 90.418% (+0.2%) from 90.169%
#431

Pull #258

github

web-flow
Merge 896018de9 into 810d1995c
Pull Request #258: Support .ELN-style crates in all zip readers and writers

283 of 315 new or added lines in 25 files covered. (89.84%)

1 existing line in 1 file now uncovered.

2010 of 2223 relevant lines covered (90.42%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.86
/src/main/java/edu/kit/datamanager/ro_crate/reader/ReadZipStreamStrategy.java
1
package edu.kit.datamanager.ro_crate.reader;
2

3
import com.fasterxml.jackson.databind.ObjectMapper;
4
import com.fasterxml.jackson.databind.node.ObjectNode;
5
import edu.kit.datamanager.ro_crate.entities.contextual.JsonDescriptor;
6
import edu.kit.datamanager.ro_crate.objectmapper.MyObjectMapper;
7
import java.io.File;
8
import java.io.FileOutputStream;
9
import java.io.IOException;
10
import java.io.InputStream;
11
import java.io.OutputStream;
12
import java.nio.file.Path;
13
import java.util.UUID;
14
import net.lingala.zip4j.io.inputstream.ZipInputStream;
15
import net.lingala.zip4j.model.LocalFileHeader;
16
import org.apache.commons.io.FileUtils;
17
import org.apache.commons.io.filefilter.FileFilterUtils;
18
import org.slf4j.Logger;
19
import org.slf4j.LoggerFactory;
20

21
/**
22
 * Reads a crate from a streamed ZIP archive.
23
 * <p>
24
 * This class handles reading and extraction of RO-Crate content from ZIP archives
25
 * into a temporary directory structure on the file system,
26
 * which allows accessing the contained files.
27
 * <p>
28
 * Supports <a href=https://github.com/TheELNConsortium/TheELNFileFormat>ELN-Style crates</a>,
29
 * meaning the crate may be either in the zip archive directly or in a single,
30
 * direct subfolder beneath the root folder (/folder).
31
 * <p>
32
 * Note: This implementation checks for up to 50 subdirectories if multiple are present.
33
 * This is to avoid zip bombs, which may contain a lot of subdirectories,
34
 * and at the same time gracefully handle valid crated with hidden subdirectories
35
 * (for example, thumbnails).
36
 * <p>
37
 * NOTE: The resulting crate may refer to these temporary files. Therefore,
38
 * these files are only being deleted before the JVM exits. If you need to free
39
 * space because your application is long-running or creates a lot of
40
 * crates, you may use the getters to retrieve information which will help
41
 * you to clean up manually. Keep in mind that crates may refer to this
42
 * folder after extraction. Use RoCrateWriter to export it so some
43
 * persistent location and possibly read it from there, if required. Or use
44
 * the ZipWriter to write it back to its source.
45
 *
46
 * @author jejkal
47
 */
48
public class ReadZipStreamStrategy implements GenericReaderStrategy<InputStream> {
49

50
    private static final Logger logger = LoggerFactory.getLogger(ReadZipStreamStrategy.class);
1✔
51
    protected final String ID = UUID.randomUUID().toString();
1✔
52
    protected Path temporaryFolder = Path.of(String.format("./.tmp/ro-crate-java/zipStreamReader/%s/", ID));
1✔
53
    protected boolean isExtracted = false;
1✔
54

55
    /**
56
     * Crates an instance with the default configuration.
57
     * <p>
58
     * The default configuration is to extract the ZipFile to
59
     * `./.tmp/ro-crate-java/zipStreamReader/%UUID/`.
60
     */
61
    public ReadZipStreamStrategy() {}
1✔
62

63
    /**
64
     * Creates a ZipStreamReader which will extract the contents temporary to
65
     * the given location instead of the default location.
66
     *
67
     * @param folderPath the custom directory to extract content to for
68
     * temporary access.
69
     * @param shallAddUuidSubfolder if true, the reader will extract into
70
     * subdirectories of the given directory. These subdirectories will have
71
     * UUIDs as their names.
72
     */
73
    public ReadZipStreamStrategy(Path folderPath, boolean shallAddUuidSubfolder) {
1✔
74
        if (shallAddUuidSubfolder) {
1✔
75
            this.temporaryFolder = folderPath.resolve(ID);
1✔
76
        } else {
77
            this.temporaryFolder = folderPath;
1✔
78
        }
79
    }
1✔
80

81
    /**
82
     * @return the identifier which may be used as the name for a subfolder in
83
     * the temporary directory.
84
     */
85
    public String getID() {
86
        return ID;
1✔
87
    }
88

89
    /**
90
     * @return the folder (considered temporary) where the zipped crate will be
91
     * or has been extracted to.
92
     */
93
    public Path getTemporaryFolder() {
94
        return temporaryFolder;
1✔
95
    }
96

97
    /**
98
     * @return whether the crate has already been extracted into the temporary
99
     * folder.
100
     */
101
    public boolean isExtracted() {
102
        return isExtracted;
1✔
103
    }
104

105
    /**Read the crate metadata and content from the provided input stream.
106
     * 
107
     * @param stream The input stream.
108
     */
109
    private void readCrate(InputStream stream) throws IOException {
110
        File folder = temporaryFolder.toFile();
1✔
111
        // ensure the directory is clean
112
        if (folder.exists()) {
1✔
113
            if (folder.isDirectory()) {
1✔
114
                FileUtils.cleanDirectory(folder);
1✔
NEW
115
            } else if (folder.isFile()) {
×
NEW
116
                FileUtils.delete(folder);
×
117
            }
118
        } else {
119
            FileUtils.forceMkdir(folder);
1✔
120
        }
121

122
        LocalFileHeader localFileHeader;
123
        int readLen;
124
        byte[] readBuffer = new byte[4096];
1✔
125

126
        try (ZipInputStream zipInputStream = new ZipInputStream(stream)) {
1✔
127
            while ((localFileHeader = zipInputStream.getNextEntry()) != null) {
1✔
128
                String fileName = localFileHeader.getFileName();
1✔
129
                File extractedFile = new File(folder, fileName).getCanonicalFile();
1✔
130
                if (!extractedFile.toPath().startsWith(folder.getCanonicalPath())) {
1✔
NEW
131
                    throw new IOException("Entry is outside of target directory: " + fileName);
×
132
                }
133
                if (localFileHeader.isDirectory()) {
1✔
134
                    FileUtils.forceMkdir(extractedFile);
1✔
135
                    continue;
1✔
136
                }
137
                FileUtils.forceMkdir(extractedFile.getParentFile());
1✔
138
                try (OutputStream outputStream = new FileOutputStream(extractedFile)) {
1✔
139
                    while ((readLen = zipInputStream.read(readBuffer)) != -1) {
1✔
140
                        outputStream.write(readBuffer, 0, readLen);
1✔
141
                    }
142
                }
143
            }
1✔
144
        }
145
        this.isExtracted = true;
1✔
146
        // register deletion on exit
147
        FileUtils.forceDeleteOnExit(folder);
1✔
148
    }
1✔
149

150
    @Override
151
    public ObjectNode readMetadataJson(InputStream stream) throws IOException {
152
        if (!isExtracted) {
1✔
153
            this.readCrate(stream);
1✔
154
        }
155

156
        ObjectMapper objectMapper = MyObjectMapper.getMapper();
1✔
157
        File jsonMetadata = temporaryFolder.resolve(JsonDescriptor.ID).toFile();
1✔
158
        if (!jsonMetadata.isFile()) {
1✔
159
            // Try to find the metadata file in subdirectories
160
            File firstSubdir = FileUtils.listFilesAndDirs(
1✔
161
                            temporaryFolder.toFile(),
1✔
162
                            FileFilterUtils.directoryFileFilter(),
1✔
163
                            null
164
                    )
165
                    .stream()
1✔
166
                    .limit(50)
1✔
167
                    .filter(file -> file.toPath().toAbsolutePath().resolve(JsonDescriptor.ID).toFile().isFile())
1✔
168
                    .findFirst()
1✔
169
                    .orElseThrow(() -> new IllegalStateException("No %s found in zip file".formatted(JsonDescriptor.ID)));
1✔
170
            jsonMetadata = firstSubdir.toPath().resolve(JsonDescriptor.ID).toFile();
1✔
171
        }
172
        return objectMapper.readTree(jsonMetadata).deepCopy();
1✔
173
    }
174

175
    @Override
176
    public File readContent(InputStream stream) throws IOException {
177
        if (!isExtracted) {
1✔
NEW
178
            this.readCrate(stream);
×
179
        }
180
        return temporaryFolder.toFile();
1✔
181
    }
182
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc