• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #22987

23 Aug 2024 06:44PM CUT coverage: 20.61% (-0.2%) from 20.791%
#22987

Pull #10781

github

landreev
added an upfront locks check to the /addGlobusFiles api #10623
Pull Request #10781: Improved handling of Globus uploads

4 of 417 new or added lines in 15 files covered. (0.96%)

4194 existing lines in 35 files now uncovered.

17388 of 84365 relevant lines covered (20.61%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

69.48
/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DataReader.java
1
package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta;
2

3
import java.io.BufferedInputStream;
4
import java.io.IOException;
5
import java.nio.ByteBuffer;
6
import java.nio.ByteOrder;
7
import java.util.Arrays;
8
import java.util.logging.Logger;
9

10
public class DataReader {
11
    private static Logger logger = Logger.getLogger(DTAFileReader.class.getPackage().getName());
1✔
12
    private BufferedInputStream stream;
13
    private int DEFAULT_BUFFER_SIZE = 8192;// * 2;
1✔
14
    private byte[] buffer;
15
    private int buffer_size;
16
    private long byte_offset;
17
    private int buffer_byte_offset;
18
    private Boolean LSF = null;
1✔
19

20
    public DataReader(BufferedInputStream stream) throws IOException {
21
        this(stream, 0);
1✔
22
    }
1✔
23

24
    public DataReader(BufferedInputStream stream, int size) throws IOException {
1✔
25
        if (buffer_size > 0) {
1✔
UNCOV
26
            this.DEFAULT_BUFFER_SIZE = size;
×
27
        }
28
        this.stream = stream;
1✔
29
        buffer = new byte[DEFAULT_BUFFER_SIZE];
1✔
30
        byte_offset = 0;
1✔
31
        buffer_byte_offset = 0;
1✔
32

33
        bufferMoreBytes();
1✔
34
    }
1✔
35

36
    public void setLSF(boolean lsf) {
37
        LSF = lsf;
1✔
38
    }
1✔
39

40
    // this returns the *absolute* byte offest in the stream. 
41
    public long getByteOffset() {
42
        return byte_offset + buffer_byte_offset;
1✔
43
    }
44

45
    /* 
46
        readBytes is the workhorse method of the internal Data Reader class.
47
        it reads the requested number of bytes from the buffer, if available, 
48
        refilling the buffer as necessary. 
49
        the method allocates the byte array it returns, so there's no need 
50
        to do so outside of it. 
51
        the method will throw an exception if for whatever reason it cannot
52
        read the requested number of bytes. 
53
     */
54
    public byte[] readBytes(int n) throws IOException {
55
        if (n <= 0) {
1✔
UNCOV
56
            throw new IOException("DataReader.readBytes called to read zero or negative number of bytes.");
×
57
            }
58
        byte[] bytes = new byte[n];
1✔
59

60
        if (this.buffer_size - buffer_byte_offset >= n) {
1✔
61
            System.arraycopy(buffer, buffer_byte_offset, bytes, 0, n);
1✔
62
            buffer_byte_offset += n;
1✔
63
        } else {
UNCOV
64
            int bytes_read = 0;
×
65

66
            // copy any bytes left in the buffer into the return array:
UNCOV
67
            if (this.buffer_size - buffer_byte_offset > 0) {
×
68
                logger.fine("reading the remaining " + (this.buffer_size - buffer_byte_offset) + " bytes from the buffer");
×
69
                System.arraycopy(buffer, buffer_byte_offset, bytes, 0, this.buffer_size - buffer_byte_offset);
×
70
                //buffer_byte_offset = this.buffer_size;
UNCOV
71
                bytes_read = this.buffer_size - buffer_byte_offset;
×
72
                buffer_byte_offset = this.buffer_size;
×
73
            }
74

UNCOV
75
            int morebytes = bufferMoreBytes();
×
76
            logger.fine("buffered " + morebytes + " bytes");
×
77

78
            /* 
79
             * keep reading and buffering buffer-size chunks, until
80
             * we read the requested number of bytes.
81
             */
UNCOV
82
            while (n - bytes_read > this.buffer_size) {
×
83
                logger.fine("copying a full buffer-worth of bytes into the return array");
×
84
                System.arraycopy(buffer, buffer_byte_offset, bytes, bytes_read, this.buffer_size);
×
85
                //buffer_byte_offset = this.buffer_size;
UNCOV
86
                bytes_read += this.buffer_size;
×
87
                buffer_byte_offset = this.buffer_size;
×
88
                morebytes = bufferMoreBytes();
×
89
                logger.fine("buffered "+morebytes+" bytes");
×
90
            }
91

92
            /* 
93
             * finally, copy the last not-a-full-buffer-worth of bytes 
94
             * into the return buffer:
95
             */
UNCOV
96
            logger.fine("copying the remaining " + (n - bytes_read) + " bytes.");
×
97
            System.arraycopy(buffer, 0, bytes, bytes_read, n - bytes_read);
×
98
            buffer_byte_offset = n - bytes_read;
×
99
        }
100

101
        return bytes;
1✔
102
    }
103

104
    /* 
105
     * This method tries to read and buffer the DEFAULT_BUFFER_SIZE bytes
106
     * and sets the current buffer size accordingly.
107
     */
108
    private int bufferMoreBytes() throws IOException {
109
        int actual_bytes_read;
110
        byte_offset += buffer_byte_offset;
1✔
111

112
        if (byte_offset == 0 || buffer_byte_offset == buffer_size) {
1✔
113
            actual_bytes_read = stream.read(buffer, 0, DEFAULT_BUFFER_SIZE);
1✔
114
            // set the current buffer size to the actual number of 
115
            // bytes read: 
116
            this.buffer_size = actual_bytes_read;
1✔
117

118
            // reset the current buffer offset and increment the total
119
            // byte offset by the size of the last buffer - that should be 
120
            // equal to the buffer_byte_offset. 
121

UNCOV
122
        } else if (buffer_byte_offset < buffer_size) {
×
123
            System.arraycopy(buffer, buffer_byte_offset, buffer, 0, buffer_size - buffer_byte_offset);
×
124
            this.buffer_size = buffer_size - buffer_byte_offset;
×
125
            actual_bytes_read = stream.read(buffer, buffer_size, DEFAULT_BUFFER_SIZE - buffer_size);
×
126
            buffer_size += actual_bytes_read;
×
127

128
        } else {
UNCOV
129
            throw new IOException("Offset already past the buffer boundary");
×
130
        }
131
        buffer_byte_offset = 0;
1✔
132

133
        return actual_bytes_read;
1✔
134
    }
135

136
    /*
137
     * Checks that LSF is not null, and sets the buffer byte order accordingly
138
     */
139
    private void checkLSF(ByteBuffer buffer) throws IOException{
140
        if (LSF == null) {
1✔
UNCOV
141
            throw new IOException("Byte order not determined for reading numeric values.");
×
142
        } else if (LSF) {
1✔
143
            buffer.order(ByteOrder.LITTLE_ENDIAN);
1✔
144
        }
145
    }
1✔
146

147
    /* 
148
     * Convenience methods for reading single bytes of data. 
149
     * Just like with the other types of integers, both the signed and 
150
     * unsigned versions are provided. 
151
     * The readByte() is used to read STATA *data* stored as 
152
     * type "Byte"; the unsigned version is used to read byte values 
153
     * in various sections of the file that store the lengths of byte
154
     * sequences that follow. 
155
     */
156
    public byte readByte() throws IOException {
157
        /* Why not just use readBytes(1) here, you ask?
158
         * - Because readBytes() will want to allocate a 
159
         * return byte[] buffer of size 1. */
160
        byte ret;
161
        if (buffer_byte_offset > this.buffer_size) {
1✔
UNCOV
162
            throw new IOException("TD - buffer overflow");
×
163
        } else if (buffer_byte_offset < this.buffer_size) {
1✔
164
            ret = buffer[buffer_byte_offset];
1✔
165
            buffer_byte_offset++;
1✔
166
        } else {
UNCOV
167
            if (bufferMoreBytes() < 1) {
×
168
                throw new IOException("reached the end of data stream prematurely.");
×
169
            }
UNCOV
170
            ret = buffer[0];
×
171
            buffer_byte_offset = 1;
×
172
        }
173
        return ret;
1✔
174
    }
175

176
    // Note that readUByte() returns the value of Java type "short". 
177
    // This is to accommodate value larger than 127.
178
    public short readUByte() throws IOException {
179
        short ret = readByte();
1✔
180
        if (ret < 0) {
1✔
181
            ret += 256;
1✔
182
        }
183
        return ret;
1✔
184
    }
185

186
    /* Various reader methods for reading primitive numeric types; 
187
     * these are used both for reading the values from the data section
188
     * (signed integer and floating-point types), and to read numeric 
189
     * values encoded as unsigned bytes in various sections of the file, 
190
     * advertising the lengths of the data sections that follow. 
191
     * Note that the internal methods bytesToInt() and bytesToSignedInt()
192
     * will throw an exception if LSF (byte order flag) has not yet been 
193
     * set.
194
     */
195
    // Unsigned integer methods readUInt() and readUShort()
196
    // return long (8 byte) and int (4 byte) integers for overflow reasons
197
    public int readUShort() throws IOException {
198
        return (int) readULong(2);
1✔
199
    }
200

201
    public long readUInt() throws IOException {
202
        return readULong(4);
1✔
203
    }
204

205
    public long readULong() throws IOException {
206
        return readULong(8);
1✔
207
    }
208

209
    public short readShort() throws IOException {
210
        ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(2));
1✔
211
        checkLSF(byte_buffer);
1✔
212
        return byte_buffer.getShort();
1✔
213
    }
214

215
    public int readInt() throws IOException {
216
        ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(4));
1✔
217
        checkLSF(byte_buffer);
1✔
218
        return byte_buffer.getInt();
1✔
219
    }
220

221
    public long readULong(int n) throws IOException {
222
        byte[] raw_bytes = readBytes(n);
1✔
223
        if (LSF == null) {
1✔
UNCOV
224
            throw new IOException("Byte order not determined for reading numeric values.");
×
225
        }
226

227
        if (n != 2 && n != 4 && n != 6 && n != 8) {
1✔
UNCOV
228
            throw new IOException("Unsupported number of bytes in an integer: " + n);
×
229
        }
230
        long ret = 0;
1✔
231
        short unsigned_byte_value;
232

233
        for (int i = 0; i < n; i++) {
1✔
234
            if (LSF) {
1✔
235
                unsigned_byte_value = raw_bytes[i];
1✔
236
            } else {
UNCOV
237
                unsigned_byte_value = raw_bytes[n - i - 1];
×
238
            }
239

240
            if (unsigned_byte_value < 0) {
1✔
241
                unsigned_byte_value += 256;
1✔
242
            }
243

244
            ret += unsigned_byte_value * (1L << (8 * i));
1✔
245
        }
246
        if(ret < 0){
1✔
247
            throw new IOException("Sorry for hoping this wouldn't be used with values over 2^63-1");
1✔
248
        }
249
        return ret;
1✔
250
    }
251

252
    // Floating point reader methods: 
253
    public double readDouble() throws IOException {
254
        ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(8));
1✔
255
        checkLSF(byte_buffer);
1✔
256
        return byte_buffer.getDouble();
1✔
257
    }
258

259
    public float readFloat() throws IOException {
260
        ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(4));
1✔
261
        checkLSF(byte_buffer);
1✔
262
        return byte_buffer.getFloat();
1✔
263
    }
264

265

266
    /* 
267
     * Method for reading character strings:
268
     *
269
     * readString() reads NULL-terminated strings; i.e. it chops the 
270
     * string at the first zero encountered. 
271
     * we probably need an alternative, readRawString(), that reads 
272
     * a String as is. 
273
     */
274
    public String readString(int n) throws IOException {
275

276
        String ret = new String(readBytes(n), "US-ASCII");
1✔
277

278
        // Remove the terminating and/or padding zero bytes:
279
        if (ret != null && ret.indexOf(0) > -1) {
1✔
280
            return ret.substring(0, ret.indexOf(0));
1✔
281
        }
UNCOV
282
        return ret;
×
283
    }
284
    
285
    /* 
286
     * Same, but expecting potential Unicode characters.
287
     */
288
    public String readUtfString(int n) throws IOException {
289

290
        String ret = new String(readBytes(n), "UTF8");
1✔
291

292
        // Remove the terminating and/or padding zero bytes:
293
        if (ret.indexOf(0) > -1) {
1✔
294
            return ret.substring(0, ret.indexOf(0));
1✔
295
        }
UNCOV
296
        return ret;
×
297
    }
298

299
    /* 
300
     * More complex helper methods for reading NewDTA "sections" ...
301
     */
302
    public byte[] readPrimitiveSection(String tag) throws IOException {
303
        readOpeningTag(tag);
1✔
304
        byte[] ret = readPrimitiveSectionBytes();
1✔
305
        readClosingTag(tag);
1✔
306
        return ret;
1✔
307
    }
308

309
    public byte[] readPrimitiveSection(String tag, int length) throws IOException {
310
        readOpeningTag(tag);
1✔
311
        byte[] ret = readBytes(length);
1✔
312
        readClosingTag(tag);
1✔
313
        return ret;
1✔
314
    }
315

316
    public String readPrimitiveStringSection(String tag) throws IOException {
317
        return new String(readPrimitiveSection(tag), "US-ASCII");
1✔
318
    }
319

320
    public String readPrimitiveStringSection(String tag, int length) throws IOException {
321
        return new String(readPrimitiveSection(tag, length), "US-ASCII");
1✔
322
    }
323

324
    public String readLabelSection(String tag, int limit) throws IOException {
325
        readOpeningTag(tag);
1✔
326
        /**
327
         * ll The byte length of the UTF-8 characters, whose length is
328
         * recorded in a 2-byte unsigned integer encoded according to
329
         * byteorder.
330
         */
331
        int lengthOfLabel = readUShort();
1✔
332
        logger.fine("length of label: " + lengthOfLabel);
1✔
333
        String label = null;
1✔
334
        if (lengthOfLabel > 0) {
1✔
UNCOV
335
            label = new String(readBytes(lengthOfLabel), "US-ASCII");
×
336
        }
337
        logger.fine("ret: " + label);
1✔
338
        readClosingTag(tag);
1✔
339
        return label;
1✔
340
    }
341

342
    /* 
343
     * This method reads a string section the length of which is *defined*.
344
     * the format of the section is as follows: 
345
     * <tag>Lxxxxxx...x</tag>
346
     * where L is a single byte specifying the length of the enclosed 
347
     * string; followed by L bytes.
348
     * L must be within 
349
     * 0 <= L <= limit
350
     * (for example, the "dataset label" is limited to 80 characters).
351
     */
352
    public String readDefinedStringSection(String tag, int limit) throws IOException {
353
        readOpeningTag(tag);
1✔
354
        short number = readUByte();
1✔
355
        logger.fine("number: " + number);
1✔
356
        if (number < 0 || number > limit) {
1✔
UNCOV
357
            throw new IOException("<more than limit characters in the section \"tag\">");
×
358
        }
359
        String ret = null;
1✔
360
        if (number > 0) {
1✔
361
            ret = new String(readBytes(number), "US-ASCII");
1✔
362
        }
363
        logger.fine("ret: " + ret);
1✔
364
        readClosingTag(tag);
1✔
365
        return ret;
1✔
366
    }
367

368
    public long readIntegerSection(String tag, int n) throws IOException {
369
        readOpeningTag(tag);
1✔
370
        long number = readULong(n);
1✔
371
        readClosingTag(tag);
1✔
372
        return number;
1✔
373
    }
374

375
    // This helper method is used for skipping the <ch>llll...</ch> sections
376
    // inside the "<charachteristics>" section; where llll is a 4-byte unsigned
377
    // int followed by llll bytes.
378
    public void skipDefinedSections(String tag) throws IOException {
379
        logger.fine("entering at offset " + buffer_byte_offset);
1✔
380
        while (checkTag("<" + tag + ">")) {
1✔
381
            logger.fine("tag " + tag + " encountered at offset " + buffer_byte_offset);
1✔
382
            readOpeningTag(tag);
1✔
383
            long number = readULong(4);
1✔
384
            logger.fine(number + " bytes in this section;");
1✔
385
            if (number < 0) {
1✔
UNCOV
386
                throw new IOException("<negative number of bytes in skipDefinedSection(\"tag\")?>");
×
387
            }
388
            byte[] skipped_bytes = readBytes((int) number);
1✔
389
            readClosingTag(tag);
1✔
390
            logger.fine("read closing tag </" + tag + ">;");
1✔
391

392
        }
1✔
393
        logger.fine("exiting at offset " + buffer_byte_offset);
1✔
394
    }
1✔
395

396
    public boolean checkTag(String tag) throws IOException {
397
        if (tag == null || tag.equals("")) {
1✔
UNCOV
398
            throw new IOException("opening tag must be a non-empty string.");
×
399
        }
400

401
        int n = tag.length();
1✔
402
        if ((this.buffer_size - buffer_byte_offset) >= n) {
1✔
403
            return (tag).equals(new String(Arrays.copyOfRange(buffer, buffer_byte_offset, buffer_byte_offset+n),"US-ASCII"));
1✔
404
        }
405
        else{
UNCOV
406
            bufferMoreBytes();
×
407
            return checkTag(tag);
×
408
        }
409

410
    }
411

412
    public void readOpeningTag(String tag) throws IOException {
413
        if (tag == null || tag.equals("")) {
1✔
UNCOV
414
            throw new IOException("opening tag must be a non-empty string.");
×
415
        }
416

417
        String openTagString = new String(readBytes(tag.length() + 2), "US-ASCII");
1✔
418
        if (openTagString == null || !openTagString.equals("<"+tag+">")) {
1✔
UNCOV
419
            throw new IOException("Could not read opening tag <"+tag+">");
×
420
        }
421
    }
1✔
422

423
    public void readClosingTag(String tag) throws IOException {
424
        if (tag == null || tag.equals("")) {
1✔
UNCOV
425
            throw new IOException("closing tag must be a non-empty string.");
×
426
        }
427

428
        String closeTagString = new String(readBytes(tag.length() + 3), "US-ASCII");
1✔
429
        logger.fine("closeTagString: " + closeTagString);
1✔
430

431
        if (closeTagString == null || !closeTagString.equals("</" + tag + ">")) {
1✔
UNCOV
432
            StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
×
433
            String msg = "";
×
434
            for (int i = 0; i < 10; i++) {
×
435
                StackTraceElement stackTraceElement = stackTrace[i];
×
436
                msg += stackTraceElement.toString() + "\n";
×
437
            }
UNCOV
438
            throw new IOException("Could not read closing tag </" + tag + ">: " + msg);
×
439
        }
440
    }
1✔
441

442
    private byte[] readPrimitiveSectionBytes() throws IOException {
443
        byte[] cached_bytes = null;
1✔
444

445
        if (buffer_byte_offset > this.buffer_size) {
1✔
UNCOV
446
            throw new IOException("Buffer overflow in DataReader.");
×
447
        }
448
        if (buffer_byte_offset == this.buffer_size) {
1✔
449
            // buffer empty; 
UNCOV
450
            bufferMoreBytes();
×
451
        }
452

453
        int cached_offset = buffer_byte_offset;
1✔
454

455
        while (buffer[buffer_byte_offset] != '<') {
1✔
456
            buffer_byte_offset++;
1✔
457

458
            if (buffer_byte_offset == this.buffer_size) {
1✔
UNCOV
459
                logger.fine("reached the end of buffer in readPrimitiveSectionBytes; offset " + buffer_byte_offset);
×
460
                cached_bytes = mergeCachedBytes(cached_bytes, cached_offset);
×
461
                bufferMoreBytes();
×
462
                cached_offset = 0;
×
463
            }
464
        }
465

466
        return mergeCachedBytes(cached_bytes, cached_offset);
1✔
467
    }
468

469
    private byte[] mergeCachedBytes(byte[] cached_bytes, int cached_offset) throws IOException {
470

471
        byte[] ret_bytes;
472
        if (cached_bytes == null) {
1✔
473
            if (buffer_byte_offset - cached_offset < 0) {
1✔
UNCOV
474
                throw new IOException("Error merging internal read buffer (no bytes cached to merge)");
×
475
            }
476
            // empty section - as in <section></section>
477
            if (buffer_byte_offset - cached_offset == 0) {
1✔
478
                return null;
1✔
479
            }
480

481
            ret_bytes = new byte[buffer_byte_offset - cached_offset];
1✔
482
            System.arraycopy(buffer, cached_offset, ret_bytes, 0, buffer_byte_offset - cached_offset);
1✔
483
        } else {
UNCOV
484
            if (cached_offset != 0) {
×
485
                throw new IOException("Error merging internal read buffer (non-zero cached offset)");
×
486
            }
UNCOV
487
            ret_bytes = new byte[cached_bytes.length + buffer_byte_offset];
×
488
            System.arraycopy(cached_bytes, 0, ret_bytes, 0, cached_bytes.length);
×
489
            if (buffer_byte_offset > 0) {
×
490
                System.arraycopy(buffer, 0, ret_bytes, cached_bytes.length, buffer_byte_offset);
×
491
            }
492
        }
493
        return ret_bytes;
1✔
494
    }
495

496
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc