#22987

Committed 23 Aug 2024 06:44PM CUT coverage: 20.61% (-0.2%) from 20.791%

Build # #22987

Build Type

Pull #10781

github

Committed by

landreev

Commit Message

added an upfront locks check to the /addGlobusFiles api #10623

Pull Request Pull Request #10781: Improved handling of Globus uploads

Run Details

4 of 417 new or added lines in 15 files covered. (0.96%)

4194 existing lines in 35 files now uncovered.

17388 of 84365 relevant lines covered (20.61%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

69.48

/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DataReader.java

package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.logging.Logger;

public class DataReader {
    private static Logger logger = Logger.getLogger(DTAFileReader.class.getPackage().getName());
    private BufferedInputStream stream;
    private int DEFAULT_BUFFER_SIZE = 8192;// * 2;
    private byte[] buffer;
    private int buffer_size;
    private long byte_offset;
    private int buffer_byte_offset;
    private Boolean LSF = null;

    public DataReader(BufferedInputStream stream) throws IOException {
        this(stream, 0);
    }

    public DataReader(BufferedInputStream stream, int size) throws IOException {
        if (buffer_size > 0) {
            this.DEFAULT_BUFFER_SIZE = size;
        }
        this.stream = stream;
        buffer = new byte[DEFAULT_BUFFER_SIZE];
        byte_offset = 0;
        buffer_byte_offset = 0;

        bufferMoreBytes();
    }

    public void setLSF(boolean lsf) {
        LSF = lsf;
    }

    // this returns the *absolute* byte offest in the stream. 
    public long getByteOffset() {
        return byte_offset + buffer_byte_offset;
    }

    /* 
        readBytes is the workhorse method of the internal Data Reader class.
        it reads the requested number of bytes from the buffer, if available, 
        refilling the buffer as necessary. 
        the method allocates the byte array it returns, so there's no need 
        to do so outside of it. 
        the method will throw an exception if for whatever reason it cannot
        read the requested number of bytes. 
     */
    public byte[] readBytes(int n) throws IOException {
        if (n <= 0) {
            throw new IOException("DataReader.readBytes called to read zero or negative number of bytes.");
            }
        byte[] bytes = new byte[n];

        if (this.buffer_size - buffer_byte_offset >= n) {
            System.arraycopy(buffer, buffer_byte_offset, bytes, 0, n);
            buffer_byte_offset += n;
        } else {
            int bytes_read = 0;

            // copy any bytes left in the buffer into the return array:
            if (this.buffer_size - buffer_byte_offset > 0) {
                logger.fine("reading the remaining " + (this.buffer_size - buffer_byte_offset) + " bytes from the buffer");
                System.arraycopy(buffer, buffer_byte_offset, bytes, 0, this.buffer_size - buffer_byte_offset);
                //buffer_byte_offset = this.buffer_size;
                bytes_read = this.buffer_size - buffer_byte_offset;
                buffer_byte_offset = this.buffer_size;
            }

            int morebytes = bufferMoreBytes();
            logger.fine("buffered " + morebytes + " bytes");

            /* 
             * keep reading and buffering buffer-size chunks, until
             * we read the requested number of bytes.
             */
            while (n - bytes_read > this.buffer_size) {
                logger.fine("copying a full buffer-worth of bytes into the return array");
                System.arraycopy(buffer, buffer_byte_offset, bytes, bytes_read, this.buffer_size);
                //buffer_byte_offset = this.buffer_size;
                bytes_read += this.buffer_size;
                buffer_byte_offset = this.buffer_size;
                morebytes = bufferMoreBytes();
                logger.fine("buffered "+morebytes+" bytes");
            }

            /* 
             * finally, copy the last not-a-full-buffer-worth of bytes 
             * into the return buffer:
             */
            logger.fine("copying the remaining " + (n - bytes_read) + " bytes.");
            System.arraycopy(buffer, 0, bytes, bytes_read, n - bytes_read);
            buffer_byte_offset = n - bytes_read;
        }

        return bytes;
    }

    /* 
     * This method tries to read and buffer the DEFAULT_BUFFER_SIZE bytes
     * and sets the current buffer size accordingly.
     */
    private int bufferMoreBytes() throws IOException {
        int actual_bytes_read;
        byte_offset += buffer_byte_offset;

        if (byte_offset == 0 || buffer_byte_offset == buffer_size) {
            actual_bytes_read = stream.read(buffer, 0, DEFAULT_BUFFER_SIZE);
            // set the current buffer size to the actual number of 
            // bytes read: 
            this.buffer_size = actual_bytes_read;

            // reset the current buffer offset and increment the total
            // byte offset by the size of the last buffer - that should be 
            // equal to the buffer_byte_offset. 

        } else if (buffer_byte_offset < buffer_size) {
            System.arraycopy(buffer, buffer_byte_offset, buffer, 0, buffer_size - buffer_byte_offset);
            this.buffer_size = buffer_size - buffer_byte_offset;
            actual_bytes_read = stream.read(buffer, buffer_size, DEFAULT_BUFFER_SIZE - buffer_size);
            buffer_size += actual_bytes_read;

        } else {
            throw new IOException("Offset already past the buffer boundary");
        }
        buffer_byte_offset = 0;

        return actual_bytes_read;
    }

    /*
     * Checks that LSF is not null, and sets the buffer byte order accordingly
     */
    private void checkLSF(ByteBuffer buffer) throws IOException{
        if (LSF == null) {
            throw new IOException("Byte order not determined for reading numeric values.");
        } else if (LSF) {
            buffer.order(ByteOrder.LITTLE_ENDIAN);
        }
    }

    /* 
     * Convenience methods for reading single bytes of data. 
     * Just like with the other types of integers, both the signed and 
     * unsigned versions are provided. 
     * The readByte() is used to read STATA *data* stored as 
     * type "Byte"; the unsigned version is used to read byte values 
     * in various sections of the file that store the lengths of byte
     * sequences that follow. 
     */
    public byte readByte() throws IOException {
        /* Why not just use readBytes(1) here, you ask?
         * - Because readBytes() will want to allocate a 
         * return byte[] buffer of size 1. */
        byte ret;
        if (buffer_byte_offset > this.buffer_size) {
            throw new IOException("TD - buffer overflow");
        } else if (buffer_byte_offset < this.buffer_size) {
            ret = buffer[buffer_byte_offset];
            buffer_byte_offset++;
        } else {
            if (bufferMoreBytes() < 1) {
                throw new IOException("reached the end of data stream prematurely.");
            }
            ret = buffer[0];
            buffer_byte_offset = 1;
        }
        return ret;
    }

    // Note that readUByte() returns the value of Java type "short". 
    // This is to accommodate value larger than 127.
    public short readUByte() throws IOException {
        short ret = readByte();
        if (ret < 0) {
            ret += 256;
        }
        return ret;
    }

    /* Various reader methods for reading primitive numeric types; 
     * these are used both for reading the values from the data section
     * (signed integer and floating-point types), and to read numeric 
     * values encoded as unsigned bytes in various sections of the file, 
     * advertising the lengths of the data sections that follow. 
     * Note that the internal methods bytesToInt() and bytesToSignedInt()
     * will throw an exception if LSF (byte order flag) has not yet been 
     * set.
     */
    // Unsigned integer methods readUInt() and readUShort()
    // return long (8 byte) and int (4 byte) integers for overflow reasons
    public int readUShort() throws IOException {
        return (int) readULong(2);
    }

    public long readUInt() throws IOException {
        return readULong(4);
    }

    public long readULong() throws IOException {
        return readULong(8);
    }

    public short readShort() throws IOException {
        ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(2));
        checkLSF(byte_buffer);
        return byte_buffer.getShort();
    }

    public int readInt() throws IOException {
        ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(4));
        checkLSF(byte_buffer);
        return byte_buffer.getInt();
    }

    public long readULong(int n) throws IOException {
        byte[] raw_bytes = readBytes(n);
        if (LSF == null) {
            throw new IOException("Byte order not determined for reading numeric values.");
        }

        if (n != 2 && n != 4 && n != 6 && n != 8) {
            throw new IOException("Unsupported number of bytes in an integer: " + n);
        }
        long ret = 0;
        short unsigned_byte_value;

        for (int i = 0; i < n; i++) {
            if (LSF) {
                unsigned_byte_value = raw_bytes[i];
            } else {
                unsigned_byte_value = raw_bytes[n - i - 1];
            }

            if (unsigned_byte_value < 0) {
                unsigned_byte_value += 256;
            }

            ret += unsigned_byte_value * (1L << (8 * i));
        }
        if(ret < 0){
            throw new IOException("Sorry for hoping this wouldn't be used with values over 2^63-1");
        }
        return ret;
    }

    // Floating point reader methods: 
    public double readDouble() throws IOException {
        ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(8));
        checkLSF(byte_buffer);
        return byte_buffer.getDouble();
    }

    public float readFloat() throws IOException {
        ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(4));
        checkLSF(byte_buffer);
        return byte_buffer.getFloat();
    }


    /* 
     * Method for reading character strings:
     *
     * readString() reads NULL-terminated strings; i.e. it chops the 
     * string at the first zero encountered. 
     * we probably need an alternative, readRawString(), that reads 
     * a String as is. 
     */
    public String readString(int n) throws IOException {

        String ret = new String(readBytes(n), "US-ASCII");

        // Remove the terminating and/or padding zero bytes:
        if (ret != null && ret.indexOf(0) > -1) {
            return ret.substring(0, ret.indexOf(0));
        }
        return ret;
    }
    
    /* 
     * Same, but expecting potential Unicode characters.
     */
    public String readUtfString(int n) throws IOException {

        String ret = new String(readBytes(n), "UTF8");

        // Remove the terminating and/or padding zero bytes:
        if (ret.indexOf(0) > -1) {
            return ret.substring(0, ret.indexOf(0));
        }
        return ret;
    }

    /* 
     * More complex helper methods for reading NewDTA "sections" ...
     */
    public byte[] readPrimitiveSection(String tag) throws IOException {
        readOpeningTag(tag);
        byte[] ret = readPrimitiveSectionBytes();
        readClosingTag(tag);
        return ret;
    }

    public byte[] readPrimitiveSection(String tag, int length) throws IOException {
        readOpeningTag(tag);
        byte[] ret = readBytes(length);
        readClosingTag(tag);
        return ret;
    }

    public String readPrimitiveStringSection(String tag) throws IOException {
        return new String(readPrimitiveSection(tag), "US-ASCII");
    }

    public String readPrimitiveStringSection(String tag, int length) throws IOException {
        return new String(readPrimitiveSection(tag, length), "US-ASCII");
    }

    public String readLabelSection(String tag, int limit) throws IOException {
        readOpeningTag(tag);
        /**
         * ll The byte length of the UTF-8 characters, whose length is
         * recorded in a 2-byte unsigned integer encoded according to
         * byteorder.
         */
        int lengthOfLabel = readUShort();
        logger.fine("length of label: " + lengthOfLabel);
        String label = null;
        if (lengthOfLabel > 0) {
            label = new String(readBytes(lengthOfLabel), "US-ASCII");
        }
        logger.fine("ret: " + label);
        readClosingTag(tag);
        return label;
    }

    /* 
     * This method reads a string section the length of which is *defined*.
     * the format of the section is as follows: 
     * <tag>Lxxxxxx...x</tag>
     * where L is a single byte specifying the length of the enclosed 
     * string; followed by L bytes.
     * L must be within 
     * 0 <= L <= limit
     * (for example, the "dataset label" is limited to 80 characters).
     */
    public String readDefinedStringSection(String tag, int limit) throws IOException {
        readOpeningTag(tag);
        short number = readUByte();
        logger.fine("number: " + number);
        if (number < 0 || number > limit) {
            throw new IOException("<more than limit characters in the section \"tag\">");
        }
        String ret = null;
        if (number > 0) {
            ret = new String(readBytes(number), "US-ASCII");
        }
        logger.fine("ret: " + ret);
        readClosingTag(tag);
        return ret;
    }

    public long readIntegerSection(String tag, int n) throws IOException {
        readOpeningTag(tag);
        long number = readULong(n);
        readClosingTag(tag);
        return number;
    }

    // This helper method is used for skipping the <ch>llll...</ch> sections
    // inside the "<charachteristics>" section; where llll is a 4-byte unsigned
    // int followed by llll bytes.
    public void skipDefinedSections(String tag) throws IOException {
        logger.fine("entering at offset " + buffer_byte_offset);
        while (checkTag("<" + tag + ">")) {
            logger.fine("tag " + tag + " encountered at offset " + buffer_byte_offset);
            readOpeningTag(tag);
            long number = readULong(4);
            logger.fine(number + " bytes in this section;");
            if (number < 0) {
                throw new IOException("<negative number of bytes in skipDefinedSection(\"tag\")?>");
            }
            byte[] skipped_bytes = readBytes((int) number);
            readClosingTag(tag);
            logger.fine("read closing tag </" + tag + ">;");

        }
        logger.fine("exiting at offset " + buffer_byte_offset);
    }

    public boolean checkTag(String tag) throws IOException {
        if (tag == null || tag.equals("")) {
            throw new IOException("opening tag must be a non-empty string.");
        }

        int n = tag.length();
        if ((this.buffer_size - buffer_byte_offset) >= n) {
            return (tag).equals(new String(Arrays.copyOfRange(buffer, buffer_byte_offset, buffer_byte_offset+n),"US-ASCII"));
        }
        else{
            bufferMoreBytes();
            return checkTag(tag);
        }

    }

    public void readOpeningTag(String tag) throws IOException {
        if (tag == null || tag.equals("")) {
            throw new IOException("opening tag must be a non-empty string.");
        }

        String openTagString = new String(readBytes(tag.length() + 2), "US-ASCII");
        if (openTagString == null || !openTagString.equals("<"+tag+">")) {
            throw new IOException("Could not read opening tag <"+tag+">");
        }
    }

    public void readClosingTag(String tag) throws IOException {
        if (tag == null || tag.equals("")) {
            throw new IOException("closing tag must be a non-empty string.");
        }

        String closeTagString = new String(readBytes(tag.length() + 3), "US-ASCII");
        logger.fine("closeTagString: " + closeTagString);

        if (closeTagString == null || !closeTagString.equals("</" + tag + ">")) {
            StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
            String msg = "";
            for (int i = 0; i < 10; i++) {
                StackTraceElement stackTraceElement = stackTrace[i];
                msg += stackTraceElement.toString() + "\n";
            }
            throw new IOException("Could not read closing tag </" + tag + ">: " + msg);
        }
    }

    private byte[] readPrimitiveSectionBytes() throws IOException {
        byte[] cached_bytes = null;

        if (buffer_byte_offset > this.buffer_size) {
            throw new IOException("Buffer overflow in DataReader.");
        }
        if (buffer_byte_offset == this.buffer_size) {
            // buffer empty; 
            bufferMoreBytes();
        }

        int cached_offset = buffer_byte_offset;

        while (buffer[buffer_byte_offset] != '<') {
            buffer_byte_offset++;

            if (buffer_byte_offset == this.buffer_size) {
                logger.fine("reached the end of buffer in readPrimitiveSectionBytes; offset " + buffer_byte_offset);
                cached_bytes = mergeCachedBytes(cached_bytes, cached_offset);
                bufferMoreBytes();
                cached_offset = 0;
            }
        }

        return mergeCachedBytes(cached_bytes, cached_offset);
    }

    private byte[] mergeCachedBytes(byte[] cached_bytes, int cached_offset) throws IOException {

        byte[] ret_bytes;
        if (cached_bytes == null) {
            if (buffer_byte_offset - cached_offset < 0) {
                throw new IOException("Error merging internal read buffer (no bytes cached to merge)");
            }
            // empty section - as in <section></section>
            if (buffer_byte_offset - cached_offset == 0) {
                return null;
            }

            ret_bytes = new byte[buffer_byte_offset - cached_offset];
            System.arraycopy(buffer, cached_offset, ret_bytes, 0, buffer_byte_offset - cached_offset);
        } else {
            if (cached_offset != 0) {
                throw new IOException("Error merging internal read buffer (non-zero cached offset)");
            }
            ret_bytes = new byte[cached_bytes.length + buffer_byte_offset];
            System.arraycopy(cached_bytes, 0, ret_bytes, 0, cached_bytes.length);
            if (buffer_byte_offset > 0) {
                System.arraycopy(buffer, 0, ret_bytes, cached_bytes.length, buffer_byte_offset);
            }
        }
        return ret_bytes;
    }

}

1	package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta;
2
3	import java.io.BufferedInputStream;
4	import java.io.IOException;
5	import java.nio.ByteBuffer;
6	import java.nio.ByteOrder;
7	import java.util.Arrays;
8	import java.util.logging.Logger;
9
10	public class DataReader {
11	private static Logger logger = Logger.getLogger(DTAFileReader.class.getPackage().getName());	1✔
12	private BufferedInputStream stream;
13	private int DEFAULT_BUFFER_SIZE = 8192;// * 2;	1✔
14	private byte[] buffer;
15	private int buffer_size;
16	private long byte_offset;
17	private int buffer_byte_offset;
18	private Boolean LSF = null;	1✔
19
20	public DataReader(BufferedInputStream stream) throws IOException {
21	this(stream, 0);	1✔
22	}	1✔
23
24	public DataReader(BufferedInputStream stream, int size) throws IOException {	1✔
25	if (buffer_size > 0) {	1✔
UNCOV 26	this.DEFAULT_BUFFER_SIZE = size;	×
27	}
28	this.stream = stream;	1✔
29	buffer = new byte[DEFAULT_BUFFER_SIZE];	1✔
30	byte_offset = 0;	1✔
31	buffer_byte_offset = 0;	1✔
32
33	bufferMoreBytes();	1✔
34	}	1✔
35
36	public void setLSF(boolean lsf) {
37	LSF = lsf;	1✔
38	}	1✔
39
40	// this returns the absolute byte offest in the stream.
41	public long getByteOffset() {
42	return byte_offset + buffer_byte_offset;	1✔
43	}
44
45	/*
46	readBytes is the workhorse method of the internal Data Reader class.
47	it reads the requested number of bytes from the buffer, if available,
48	refilling the buffer as necessary.
49	the method allocates the byte array it returns, so there's no need
50	to do so outside of it.
51	the method will throw an exception if for whatever reason it cannot
52	read the requested number of bytes.
53	*/
54	public byte[] readBytes(int n) throws IOException {
55	if (n <= 0) {	1✔
UNCOV 56	throw new IOException("DataReader.readBytes called to read zero or negative number of bytes.");	×
57	}
58	byte[] bytes = new byte[n];	1✔
59
60	if (this.buffer_size - buffer_byte_offset >= n) {	1✔
61	System.arraycopy(buffer, buffer_byte_offset, bytes, 0, n);	1✔
62	buffer_byte_offset += n;	1✔
63	} else {
UNCOV 64	int bytes_read = 0;	×
65
66	// copy any bytes left in the buffer into the return array:
UNCOV 67	if (this.buffer_size - buffer_byte_offset > 0) {	×
68	logger.fine("reading the remaining " + (this.buffer_size - buffer_byte_offset) + " bytes from the buffer");	×
69	System.arraycopy(buffer, buffer_byte_offset, bytes, 0, this.buffer_size - buffer_byte_offset);	×
70	//buffer_byte_offset = this.buffer_size;
UNCOV 71	bytes_read = this.buffer_size - buffer_byte_offset;	×
72	buffer_byte_offset = this.buffer_size;	×
73	}
74
UNCOV 75	int morebytes = bufferMoreBytes();	×
76	logger.fine("buffered " + morebytes + " bytes");	×
77
78	/*
79	* keep reading and buffering buffer-size chunks, until
80	* we read the requested number of bytes.
81	*/
UNCOV 82	while (n - bytes_read > this.buffer_size) {	×
83	logger.fine("copying a full buffer-worth of bytes into the return array");	×
84	System.arraycopy(buffer, buffer_byte_offset, bytes, bytes_read, this.buffer_size);	×
85	//buffer_byte_offset = this.buffer_size;
UNCOV 86	bytes_read += this.buffer_size;	×
87	buffer_byte_offset = this.buffer_size;	×
88	morebytes = bufferMoreBytes();	×
89	logger.fine("buffered "+morebytes+" bytes");	×
90	}
91
92	/*
93	* finally, copy the last not-a-full-buffer-worth of bytes
94	* into the return buffer:
95	*/
UNCOV 96	logger.fine("copying the remaining " + (n - bytes_read) + " bytes.");	×
97	System.arraycopy(buffer, 0, bytes, bytes_read, n - bytes_read);	×
98	buffer_byte_offset = n - bytes_read;	×
99	}
100
101	return bytes;	1✔
102	}
103
104	/*
105	* This method tries to read and buffer the DEFAULT_BUFFER_SIZE bytes
106	* and sets the current buffer size accordingly.
107	*/
108	private int bufferMoreBytes() throws IOException {
109	int actual_bytes_read;
110	byte_offset += buffer_byte_offset;	1✔
111
112	if (byte_offset == 0 \|\| buffer_byte_offset == buffer_size) {	1✔
113	actual_bytes_read = stream.read(buffer, 0, DEFAULT_BUFFER_SIZE);	1✔
114	// set the current buffer size to the actual number of
115	// bytes read:
116	this.buffer_size = actual_bytes_read;	1✔
117
118	// reset the current buffer offset and increment the total
119	// byte offset by the size of the last buffer - that should be
120	// equal to the buffer_byte_offset.
121
UNCOV 122	} else if (buffer_byte_offset < buffer_size) {	×
123	System.arraycopy(buffer, buffer_byte_offset, buffer, 0, buffer_size - buffer_byte_offset);	×
124	this.buffer_size = buffer_size - buffer_byte_offset;	×
125	actual_bytes_read = stream.read(buffer, buffer_size, DEFAULT_BUFFER_SIZE - buffer_size);	×
126	buffer_size += actual_bytes_read;	×
127
128	} else {
UNCOV 129	throw new IOException("Offset already past the buffer boundary");	×
130	}
131	buffer_byte_offset = 0;	1✔
132
133	return actual_bytes_read;	1✔
134	}
135
136	/*
137	* Checks that LSF is not null, and sets the buffer byte order accordingly
138	*/
139	private void checkLSF(ByteBuffer buffer) throws IOException{
140	if (LSF == null) {	1✔
UNCOV 141	throw new IOException("Byte order not determined for reading numeric values.");	×
142	} else if (LSF) {	1✔
143	buffer.order(ByteOrder.LITTLE_ENDIAN);	1✔
144	}
145	}	1✔
146
147	/*
148	* Convenience methods for reading single bytes of data.
149	* Just like with the other types of integers, both the signed and
150	* unsigned versions are provided.
151	* The readByte() is used to read STATA data stored as
152	* type "Byte"; the unsigned version is used to read byte values
153	* in various sections of the file that store the lengths of byte
154	* sequences that follow.
155	*/
156	public byte readByte() throws IOException {
157	/* Why not just use readBytes(1) here, you ask?
158	* - Because readBytes() will want to allocate a
159	* return byte[] buffer of size 1. */
160	byte ret;
161	if (buffer_byte_offset > this.buffer_size) {	1✔
UNCOV 162	throw new IOException("TD - buffer overflow");	×
163	} else if (buffer_byte_offset < this.buffer_size) {	1✔
164	ret = buffer[buffer_byte_offset];	1✔
165	buffer_byte_offset++;	1✔
166	} else {
UNCOV 167	if (bufferMoreBytes() < 1) {	×
168	throw new IOException("reached the end of data stream prematurely.");	×
169	}
UNCOV 170	ret = buffer[0];	×
171	buffer_byte_offset = 1;	×
172	}
173	return ret;	1✔
174	}
175
176	// Note that readUByte() returns the value of Java type "short".
177	// This is to accommodate value larger than 127.
178	public short readUByte() throws IOException {
179	short ret = readByte();	1✔
180	if (ret < 0) {	1✔
181	ret += 256;	1✔
182	}
183	return ret;	1✔
184	}
185
186	/* Various reader methods for reading primitive numeric types;
187	* these are used both for reading the values from the data section
188	* (signed integer and floating-point types), and to read numeric
189	* values encoded as unsigned bytes in various sections of the file,
190	* advertising the lengths of the data sections that follow.
191	* Note that the internal methods bytesToInt() and bytesToSignedInt()
192	* will throw an exception if LSF (byte order flag) has not yet been
193	* set.
194	*/
195	// Unsigned integer methods readUInt() and readUShort()
196	// return long (8 byte) and int (4 byte) integers for overflow reasons
197	public int readUShort() throws IOException {
198	return (int) readULong(2);	1✔
199	}
200
201	public long readUInt() throws IOException {
202	return readULong(4);	1✔
203	}
204
205	public long readULong() throws IOException {
206	return readULong(8);	1✔
207	}
208
209	public short readShort() throws IOException {
210	ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(2));	1✔
211	checkLSF(byte_buffer);	1✔
212	return byte_buffer.getShort();	1✔
213	}
214
215	public int readInt() throws IOException {
216	ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(4));	1✔
217	checkLSF(byte_buffer);	1✔
218	return byte_buffer.getInt();	1✔
219	}
220
221	public long readULong(int n) throws IOException {
222	byte[] raw_bytes = readBytes(n);	1✔
223	if (LSF == null) {	1✔
UNCOV 224	throw new IOException("Byte order not determined for reading numeric values.");	×
225	}
226
227	if (n != 2 && n != 4 && n != 6 && n != 8) {	1✔
UNCOV 228	throw new IOException("Unsupported number of bytes in an integer: " + n);	×
229	}
230	long ret = 0;	1✔
231	short unsigned_byte_value;
232
233	for (int i = 0; i < n; i++) {	1✔
234	if (LSF) {	1✔
235	unsigned_byte_value = raw_bytes[i];	1✔
236	} else {
UNCOV 237	unsigned_byte_value = raw_bytes[n - i - 1];	×
238	}
239
240	if (unsigned_byte_value < 0) {	1✔
241	unsigned_byte_value += 256;	1✔
242	}
243
244	ret += unsigned_byte_value * (1L << (8 * i));	1✔
245	}
246	if(ret < 0){	1✔
247	throw new IOException("Sorry for hoping this wouldn't be used with values over 2^63-1");	1✔
248	}
249	return ret;	1✔
250	}
251
252	// Floating point reader methods:
253	public double readDouble() throws IOException {
254	ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(8));	1✔
255	checkLSF(byte_buffer);	1✔
256	return byte_buffer.getDouble();	1✔
257	}
258
259	public float readFloat() throws IOException {
260	ByteBuffer byte_buffer = ByteBuffer.wrap(readBytes(4));	1✔
261	checkLSF(byte_buffer);	1✔
262	return byte_buffer.getFloat();	1✔
263	}
264
265
266	/*
267	* Method for reading character strings:
268	*
269	* readString() reads NULL-terminated strings; i.e. it chops the
270	* string at the first zero encountered.
271	* we probably need an alternative, readRawString(), that reads
272	* a String as is.
273	*/
274	public String readString(int n) throws IOException {
275
276	String ret = new String(readBytes(n), "US-ASCII");	1✔
277
278	// Remove the terminating and/or padding zero bytes:
279	if (ret != null && ret.indexOf(0) > -1) {	1✔
280	return ret.substring(0, ret.indexOf(0));	1✔
281	}
UNCOV 282	return ret;	×
283	}
284
285	/*
286	* Same, but expecting potential Unicode characters.
287	*/
288	public String readUtfString(int n) throws IOException {
289
290	String ret = new String(readBytes(n), "UTF8");	1✔
291
292	// Remove the terminating and/or padding zero bytes:
293	if (ret.indexOf(0) > -1) {	1✔
294	return ret.substring(0, ret.indexOf(0));	1✔
295	}
UNCOV 296	return ret;	×
297	}
298
299	/*
300	* More complex helper methods for reading NewDTA "sections" ...
301	*/
302	public byte[] readPrimitiveSection(String tag) throws IOException {
303	readOpeningTag(tag);	1✔
304	byte[] ret = readPrimitiveSectionBytes();	1✔
305	readClosingTag(tag);	1✔
306	return ret;	1✔
307	}
308
309	public byte[] readPrimitiveSection(String tag, int length) throws IOException {
310	readOpeningTag(tag);	1✔
311	byte[] ret = readBytes(length);	1✔
312	readClosingTag(tag);	1✔
313	return ret;	1✔
314	}
315
316	public String readPrimitiveStringSection(String tag) throws IOException {
317	return new String(readPrimitiveSection(tag), "US-ASCII");	1✔
318	}
319
320	public String readPrimitiveStringSection(String tag, int length) throws IOException {
321	return new String(readPrimitiveSection(tag, length), "US-ASCII");	1✔
322	}
323
324	public String readLabelSection(String tag, int limit) throws IOException {
325	readOpeningTag(tag);	1✔
326	/**
327	* ll The byte length of the UTF-8 characters, whose length is
328	* recorded in a 2-byte unsigned integer encoded according to
329	* byteorder.
330	*/
331	int lengthOfLabel = readUShort();	1✔
332	logger.fine("length of label: " + lengthOfLabel);	1✔
333	String label = null;	1✔
334	if (lengthOfLabel > 0) {	1✔
UNCOV 335	label = new String(readBytes(lengthOfLabel), "US-ASCII");	×
336	}
337	logger.fine("ret: " + label);	1✔
338	readClosingTag(tag);	1✔
339	return label;	1✔
340	}
341
342	/*
343	* This method reads a string section the length of which is defined.
344	* the format of the section is as follows:
345	* <tag>Lxxxxxx...x</tag>
346	* where L is a single byte specifying the length of the enclosed
347	* string; followed by L bytes.
348	* L must be within
349	* 0 <= L <= limit
350	* (for example, the "dataset label" is limited to 80 characters).
351	*/
352	public String readDefinedStringSection(String tag, int limit) throws IOException {
353	readOpeningTag(tag);	1✔
354	short number = readUByte();	1✔
355	logger.fine("number: " + number);	1✔
356	if (number < 0 \|\| number > limit) {	1✔
UNCOV 357	throw new IOException("<more than limit characters in the section \"tag\">");	×
358	}
359	String ret = null;	1✔
360	if (number > 0) {	1✔
361	ret = new String(readBytes(number), "US-ASCII");	1✔
362	}
363	logger.fine("ret: " + ret);	1✔
364	readClosingTag(tag);	1✔
365	return ret;	1✔
366	}
367
368	public long readIntegerSection(String tag, int n) throws IOException {
369	readOpeningTag(tag);	1✔
370	long number = readULong(n);	1✔
371	readClosingTag(tag);	1✔
372	return number;	1✔
373	}
374
375	// This helper method is used for skipping the <ch>llll...</ch> sections
376	// inside the "<charachteristics>" section; where llll is a 4-byte unsigned
377	// int followed by llll bytes.
378	public void skipDefinedSections(String tag) throws IOException {
379	logger.fine("entering at offset " + buffer_byte_offset);	1✔
380	while (checkTag("<" + tag + ">")) {	1✔
381	logger.fine("tag " + tag + " encountered at offset " + buffer_byte_offset);	1✔
382	readOpeningTag(tag);	1✔
383	long number = readULong(4);	1✔
384	logger.fine(number + " bytes in this section;");	1✔
385	if (number < 0) {	1✔
UNCOV 386	throw new IOException("<negative number of bytes in skipDefinedSection(\"tag\")?>");	×
387	}
388	byte[] skipped_bytes = readBytes((int) number);	1✔
389	readClosingTag(tag);	1✔
390	logger.fine("read closing tag </" + tag + ">;");	1✔
391
392	}	1✔
393	logger.fine("exiting at offset " + buffer_byte_offset);	1✔
394	}	1✔
395
396	public boolean checkTag(String tag) throws IOException {
397	if (tag == null \|\| tag.equals("")) {	1✔
UNCOV 398	throw new IOException("opening tag must be a non-empty string.");	×
399	}
400
401	int n = tag.length();	1✔
402	if ((this.buffer_size - buffer_byte_offset) >= n) {	1✔
403	return (tag).equals(new String(Arrays.copyOfRange(buffer, buffer_byte_offset, buffer_byte_offset+n),"US-ASCII"));	1✔
404	}
405	else{
UNCOV 406	bufferMoreBytes();	×
407	return checkTag(tag);	×
408	}
409
410	}
411
412	public void readOpeningTag(String tag) throws IOException {
413	if (tag == null \|\| tag.equals("")) {	1✔
UNCOV 414	throw new IOException("opening tag must be a non-empty string.");	×
415	}
416
417	String openTagString = new String(readBytes(tag.length() + 2), "US-ASCII");	1✔
418	if (openTagString == null \|\| !openTagString.equals("<"+tag+">")) {	1✔
UNCOV 419	throw new IOException("Could not read opening tag <"+tag+">");	×
420	}
421	}	1✔
422
423	public void readClosingTag(String tag) throws IOException {
424	if (tag == null \|\| tag.equals("")) {	1✔
UNCOV 425	throw new IOException("closing tag must be a non-empty string.");	×
426	}
427
428	String closeTagString = new String(readBytes(tag.length() + 3), "US-ASCII");	1✔
429	logger.fine("closeTagString: " + closeTagString);	1✔
430
431	if (closeTagString == null \|\| !closeTagString.equals("</" + tag + ">")) {	1✔
UNCOV 432	StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();	×
433	String msg = "";	×
434	for (int i = 0; i < 10; i++) {	×
435	StackTraceElement stackTraceElement = stackTrace[i];	×
436	msg += stackTraceElement.toString() + "\n";	×
437	}
UNCOV 438	throw new IOException("Could not read closing tag </" + tag + ">: " + msg);	×
439	}
440	}	1✔
441
442	private byte[] readPrimitiveSectionBytes() throws IOException {
443	byte[] cached_bytes = null;	1✔
444
445	if (buffer_byte_offset > this.buffer_size) {	1✔
UNCOV 446	throw new IOException("Buffer overflow in DataReader.");	×
447	}
448	if (buffer_byte_offset == this.buffer_size) {	1✔
449	// buffer empty;
UNCOV 450	bufferMoreBytes();	×
451	}
452
453	int cached_offset = buffer_byte_offset;	1✔
454
455	while (buffer[buffer_byte_offset] != '<') {	1✔
456	buffer_byte_offset++;	1✔
457
458	if (buffer_byte_offset == this.buffer_size) {	1✔
UNCOV 459	logger.fine("reached the end of buffer in readPrimitiveSectionBytes; offset " + buffer_byte_offset);	×
460	cached_bytes = mergeCachedBytes(cached_bytes, cached_offset);	×
461	bufferMoreBytes();	×
462	cached_offset = 0;	×
463	}
464	}
465
466	return mergeCachedBytes(cached_bytes, cached_offset);	1✔
467	}
468
469	private byte[] mergeCachedBytes(byte[] cached_bytes, int cached_offset) throws IOException {
470
471	byte[] ret_bytes;
472	if (cached_bytes == null) {	1✔
473	if (buffer_byte_offset - cached_offset < 0) {	1✔
UNCOV 474	throw new IOException("Error merging internal read buffer (no bytes cached to merge)");	×
475	}
476	// empty section - as in <section></section>
477	if (buffer_byte_offset - cached_offset == 0) {	1✔
478	return null;	1✔
479	}
480
481	ret_bytes = new byte[buffer_byte_offset - cached_offset];	1✔
482	System.arraycopy(buffer, cached_offset, ret_bytes, 0, buffer_byte_offset - cached_offset);	1✔
483	} else {
UNCOV 484	if (cached_offset != 0) {	×
485	throw new IOException("Error merging internal read buffer (non-zero cached offset)");	×
486	}
UNCOV 487	ret_bytes = new byte[cached_bytes.length + buffer_byte_offset];	×
488	System.arraycopy(cached_bytes, 0, ret_bytes, 0, cached_bytes.length);	×
489	if (buffer_byte_offset > 0) {	×
490	System.arraycopy(buffer, 0, ret_bytes, cached_bytes.length, buffer_byte_offset);	×
491	}
492	}
493	return ret_bytes;	1✔
494	}
495
496	}

IQSS / dataverse / #22987

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous