• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

kit-data-manager / ro-crate-java / #432

19 May 2025 03:13PM UTC coverage: 90.791% (+0.6%) from 90.169%
#432

Pull #258

github

web-flow
Merge 1e2da1171 into 810d1995c
Pull Request #258: Support .ELN-style crates in all zip readers and writers

235 of 253 new or added lines in 25 files covered. (92.89%)

1 existing line in 1 file now uncovered.

1962 of 2161 relevant lines covered (90.79%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.08
/src/main/java/edu/kit/datamanager/ro_crate/reader/ReadZipStreamStrategy.java
1
package edu.kit.datamanager.ro_crate.reader;
2

3
import com.fasterxml.jackson.databind.ObjectMapper;
4
import com.fasterxml.jackson.databind.node.ObjectNode;
5
import edu.kit.datamanager.ro_crate.entities.contextual.JsonDescriptor;
6
import edu.kit.datamanager.ro_crate.objectmapper.MyObjectMapper;
7
import java.io.File;
8
import java.io.FileOutputStream;
9
import java.io.IOException;
10
import java.io.InputStream;
11
import java.io.OutputStream;
12
import java.nio.file.Path;
13
import java.util.UUID;
14

15
import edu.kit.datamanager.ro_crate.util.FileSystemUtil;
16
import net.lingala.zip4j.io.inputstream.ZipInputStream;
17
import net.lingala.zip4j.model.LocalFileHeader;
18
import org.apache.commons.io.FileUtils;
19
import org.apache.commons.io.filefilter.FileFilterUtils;
20
import org.slf4j.Logger;
21
import org.slf4j.LoggerFactory;
22

23
/**
24
 * Reads a crate from a streamed ZIP archive.
25
 * <p>
26
 * This class handles reading and extraction of RO-Crate content from ZIP archives
27
 * into a temporary directory structure on the file system,
28
 * which allows accessing the contained files.
29
 * <p>
30
 * Supports <a href=https://github.com/TheELNConsortium/TheELNFileFormat>ELN-Style crates</a>,
31
 * meaning the crate may be either in the zip archive directly or in a single,
32
 * direct subfolder beneath the root folder (/folder).
33
 * <p>
34
 * Note: This implementation checks for up to 50 subdirectories if multiple are present.
35
 * This is to avoid zip bombs, which may contain a lot of subdirectories,
36
 * and at the same time gracefully handle valid crated with hidden subdirectories
37
 * (for example, thumbnails).
38
 * <p>
39
 * NOTE: The resulting crate may refer to these temporary files. Therefore,
40
 * these files are only being deleted before the JVM exits. If you need to free
41
 * space because your application is long-running or creates a lot of
42
 * crates, you may use the getters to retrieve information which will help
43
 * you to clean up manually. Keep in mind that crates may refer to this
44
 * folder after extraction. Use RoCrateWriter to export it so some
45
 * persistent location and possibly read it from there, if required. Or use
46
 * the ZipWriter to write it back to its source.
47
 *
48
 * @author jejkal
49
 */
50
public class ReadZipStreamStrategy implements GenericReaderStrategy<InputStream> {
51

52
    private static final Logger logger = LoggerFactory.getLogger(ReadZipStreamStrategy.class);
1✔
53
    protected final String ID = UUID.randomUUID().toString();
1✔
54
    protected Path temporaryFolder = Path.of(String.format("./.tmp/ro-crate-java/zipStreamReader/%s/", ID));
1✔
55
    protected boolean isExtracted = false;
1✔
56

57
    /**
58
     * Crates an instance with the default configuration.
59
     * <p>
60
     * The default configuration is to extract the ZipFile to
61
     * `./.tmp/ro-crate-java/zipStreamReader/%UUID/`.
62
     */
63
    public ReadZipStreamStrategy() {}
1✔
64

65
    /**
66
     * Creates a ZipStreamReader which will extract the contents temporary to
67
     * the given location instead of the default location.
68
     *
69
     * @param folderPath the custom directory to extract content to for
70
     * temporary access.
71
     * @param shallAddUuidSubfolder if true, the reader will extract into
72
     * subdirectories of the given directory. These subdirectories will have
73
     * UUIDs as their names.
74
     */
75
    public ReadZipStreamStrategy(Path folderPath, boolean shallAddUuidSubfolder) {
1✔
76
        if (shallAddUuidSubfolder) {
1✔
77
            this.temporaryFolder = folderPath.resolve(ID);
1✔
78
        } else {
79
            this.temporaryFolder = folderPath;
1✔
80
        }
81
    }
1✔
82

83
    /**
84
     * @return the identifier which may be used as the name for a subfolder in
85
     * the temporary directory.
86
     */
87
    public String getID() {
88
        return ID;
1✔
89
    }
90

91
    /**
92
     * @return the folder (considered temporary) where the zipped crate will be
93
     * or has been extracted to.
94
     */
95
    public Path getTemporaryFolder() {
96
        return temporaryFolder;
1✔
97
    }
98

99
    /**
100
     * @return whether the crate has already been extracted into the temporary
101
     * folder.
102
     */
103
    public boolean isExtracted() {
104
        return isExtracted;
1✔
105
    }
106

107
    /**Read the crate metadata and content from the provided input stream.
108
     * 
109
     * @param stream The input stream.
110
     */
111
    private void readCrate(InputStream stream) throws IOException {
112
        File folder = temporaryFolder.toFile();
1✔
113
        FileSystemUtil.mkdirOrDeleteContent(folder);
1✔
114

115
        LocalFileHeader localFileHeader;
116
        int readLen;
117
        byte[] readBuffer = new byte[4096];
1✔
118

119
        try (ZipInputStream zipInputStream = new ZipInputStream(stream)) {
1✔
120
            while ((localFileHeader = zipInputStream.getNextEntry()) != null) {
1✔
121
                String fileName = localFileHeader.getFileName();
1✔
122
                File extractedFile = new File(folder, fileName).getCanonicalFile();
1✔
123
                if (!extractedFile.toPath().startsWith(folder.getCanonicalPath())) {
1✔
NEW
124
                    throw new IOException("Entry is outside of target directory: " + fileName);
×
125
                }
126
                if (localFileHeader.isDirectory()) {
1✔
127
                    FileUtils.forceMkdir(extractedFile);
1✔
128
                    continue;
1✔
129
                }
130
                FileUtils.forceMkdir(extractedFile.getParentFile());
1✔
131
                try (OutputStream outputStream = new FileOutputStream(extractedFile)) {
1✔
132
                    while ((readLen = zipInputStream.read(readBuffer)) != -1) {
1✔
133
                        outputStream.write(readBuffer, 0, readLen);
1✔
134
                    }
135
                }
136
            }
1✔
137
        }
138
        this.isExtracted = true;
1✔
139
        // register deletion on exit
140
        FileUtils.forceDeleteOnExit(folder);
1✔
141
    }
1✔
142

143
    @Override
144
    public ObjectNode readMetadataJson(InputStream stream) throws IOException {
145
        if (!isExtracted) {
1✔
146
            this.readCrate(stream);
1✔
147
        }
148

149
        ObjectMapper objectMapper = MyObjectMapper.getMapper();
1✔
150
        File jsonMetadata = temporaryFolder.resolve(JsonDescriptor.ID).toFile();
1✔
151
        if (!jsonMetadata.isFile()) {
1✔
152
            // Try to find the metadata file in subdirectories
153
            File firstSubdir = FileUtils.listFilesAndDirs(
1✔
154
                            temporaryFolder.toFile(),
1✔
155
                            FileFilterUtils.directoryFileFilter(),
1✔
156
                            null
157
                    )
158
                    .stream()
1✔
159
                    .limit(50)
1✔
160
                    .filter(file -> file.toPath().toAbsolutePath().resolve(JsonDescriptor.ID).toFile().isFile())
1✔
161
                    .findFirst()
1✔
162
                    .orElseThrow(() -> new IllegalStateException("No %s found in zip file".formatted(JsonDescriptor.ID)));
1✔
163
            jsonMetadata = firstSubdir.toPath().resolve(JsonDescriptor.ID).toFile();
1✔
164
        }
165
        return objectMapper.readTree(jsonMetadata).deepCopy();
1✔
166
    }
167

168
    @Override
169
    public File readContent(InputStream stream) throws IOException {
170
        if (!isExtracted) {
1✔
NEW
171
            this.readCrate(stream);
×
172
        }
173
        return temporaryFolder.toFile();
1✔
174
    }
175
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc