• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #22002

01 Apr 2024 07:56PM CUT coverage: 20.716% (+0.5%) from 20.173%
#22002

push

github

web-flow
Merge pull request #10453 from IQSS/develop

Merge 6.2 into master

704 of 2679 new or added lines in 152 files covered. (26.28%)

81 existing lines in 49 files now uncovered.

17160 of 82836 relevant lines covered (20.72%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.5
/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java
1
package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta;
2

3
import java.io.BufferedInputStream;
4
import java.io.File;
5
import java.io.FileInputStream;
6
import java.io.FileOutputStream;
7
import java.io.IOException;
8
import java.io.OutputStreamWriter;
9
import java.io.PrintWriter;
10
import java.text.DecimalFormat;
11
import java.text.NumberFormat;
12
import java.text.ParseException;
13
import java.text.SimpleDateFormat;
14
import java.util.ArrayList;
15
import java.util.Arrays;
16
import java.util.Calendar;
17
import java.util.Date;
18
import java.util.GregorianCalendar;
19
import java.util.HashMap;
20
import java.util.HashSet;
21
import java.util.LinkedHashMap;
22
import java.util.List;
23
import java.util.Map;
24
import java.util.Scanner;
25
import java.util.Set;
26
import java.util.TimeZone;
27
import java.util.logging.Logger;
28

29
import edu.harvard.iq.dataverse.DataTable;
30
import edu.harvard.iq.dataverse.datavariable.DataVariable;
31
import edu.harvard.iq.dataverse.datavariable.VariableCategory;
32

33
import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataFileReader;
34
import edu.harvard.iq.dataverse.ingest.tabulardata.spi.TabularDataFileReaderSpi;
35
import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest;
36
import org.apache.commons.lang3.StringUtils;
37

38
/**
39
 * ingest plugin for Stata 13-15 (117-119) DTA file format. A copy and paste from
40
 *
41
 * - v.13/("dta 117"): https://www.stata.com/help.cgi?dta_117
42
 *
43
 * - v.14/("dta 118"): https://www.stata.com/help.cgi?dta
44
 *
45
 * - v.14/("dta 119"): https://www.stata.com/help.cgi?dta_119
46
 *
47
 */
48
public class NewDTAFileReader extends TabularDataFileReader {
49
    //@Inject
50
    //VariableServiceBean varService;
51
    // static fields, STATA-specific constants, etc. 
52

53
    // SECTION TAGS:
54
    // 
55
    // The new STATA format features XML-like section tags - 
56
    // <stata_dta><header>...</header>...</stata_dta>
57
    
58
    // MAIN, TOP-LEVEL FILE SECTION:
59
    private static final String TAG_DTA = "stata_dta";
60

61
    // HEADER SECTION: 
62
    private static final String TAG_HEADER = "header";
63
    private static final String TAG_HEADER_FILEFORMATID = "release";
64
    private static final String TAG_HEADER_BYTEORDER = "byteorder";
65
    private static final String TAG_HEADER_VARNUMBER = "K";
66
    private static final String TAG_HEADER_OBSNUMBER = "N";
67
    private static final String TAG_HEADER_FILELABEL = "label";
68
    private static final String TAG_HEADER_TIMESTAMP = "timestamp";
69

70
    // MAP SECTION: 
71
    private static final String TAG_MAP = "map";
72

73
    // VARIABLE TYPES SECTION: 
74
    private static final String TAG_VARIABLE_TYPES = "variable_types";
75

76
    // VARIABLE NAMES SECTION: 
77
    private static final String TAG_VARIABLE_NAMES = "varnames";
78

79
    // VARIABLE SORT ORDER SECTION: 
80
    private static final String TAG_SORT_ORDER = "sortlist";
81

82
    // VARIABLE DISPLAY FORMATS: 
83
    private static final String TAG_DISPLAY_FORMATS = "formats";
84

85
    // VALUE LABEL FORMAT NAMES: 
86
    private static final String TAG_VALUE_LABEL_FORMAT_NAMES = "value_label_names";
87

88
    // VARIABLE LABELS: 
89
    private static final String TAG_VARIABLE_LABELS = "variable_labels";
90

91
    // "CHARACTERISTICS":
92
    private static final String TAG_CHARACTERISTICS = "characteristics";
93
    private static final String TAG_CHARACTERISTICS_SUBSECTION = "ch";
94

95
    // DATA SECTION!
96
    private static final String TAG_DATA = "data";
97

98
    // STRLs SECTION: 
99
    private static final String TAG_STRLS = "strls";
100
    private static final String STRL_GSO_HEAD = "GSO";
101

102
    // VALUE LABELS SECTION:
103
    private static final String TAG_VALUE_LABELS = "value_labels";
104
    private static final String TAG_VALUE_LABELS_LBL_DEF = "lbl";
105

106
    private static Map<Integer, String> STATA_RELEASE_NUMBER =
1✔
107
            new HashMap<Integer, String>();
108

109
    private static Map<Integer, Map<String, Integer>> CONSTANT_TABLE =
1✔
110
            new LinkedHashMap<Integer, Map<String, Integer>>();
111

112
    private static Map<String, Integer> releaseconstant
1✔
113
            = new LinkedHashMap<String, Integer>();
114

115
    private static Map<String, Integer> byteLengthTable =
1✔
116
            new HashMap<String, Integer>();
117

118
    private static Map<Integer, String> variableTypeTable =
1✔
119
            new LinkedHashMap<Integer, String>();
120

121
    private static final int[] LENGTH_HEADER = {60, 109};
1✔
122
    private static final int[] LENGTH_LABEL = {32, 81};
1✔
123
    private static final int[] LENGTH_NAME = {9, 33};
1✔
124
    private static final int[] LENGTH_FORMAT_FIELD = {7, 12, 49};
1✔
125
    private static final int[] LENGTH_EXPANSION_FIELD = {0, 2, 4};
1✔
126
    private static final int[] DBL_MV_PWR = {333, 1023};
1✔
127

128
    private static final int DTA_MAGIC_NUMBER_LENGTH = 4;
129
    private static final int NVAR_FIELD_LENGTH       = 2;
130
    private static final int NOBS_FIELD_LENGTH       = 4;
131
    private static final int TIME_STAMP_LENGTH      = 18;
132
    private static final int VAR_SORT_FIELD_LENGTH   = 2;
133
    private static final int VALUE_LABEL_HEADER_PADDING_LENGTH = 3;
134

135
    private static int MISSING_VALUE_BIAS = 26;
1✔
136

137
    private byte BYTE_MISSING_VALUE = Byte.MAX_VALUE;
1✔
138
    private short INT_MISSIG_VALUE = Short.MAX_VALUE;
1✔
139
    private int LONG_MISSING_VALUE = Integer.MAX_VALUE;
1✔
140

141
    // Static initialization:
142
    static {
143
        releaseconstant.put("HEADER",     LENGTH_HEADER[1]);
1✔
144
        releaseconstant.put("LABEL",     LENGTH_LABEL[1]);
1✔
145
        releaseconstant.put("NAME",      LENGTH_NAME[1]);
1✔
146
        releaseconstant.put("FORMAT",    LENGTH_FORMAT_FIELD[1]);
1✔
147
        releaseconstant.put("EXPANSION", LENGTH_EXPANSION_FIELD[2]);
1✔
148
        releaseconstant.put("DBL_MV_PWR", DBL_MV_PWR[1]);
1✔
149
        
150
        // 1, 2 and 4-byte integers: 
151
        byteLengthTable.put("Byte",1);
1✔
152
        byteLengthTable.put("Integer",2);
1✔
153
        byteLengthTable.put("Long",4);
1✔
154
        // 4 and 8-byte floats: 
155
        byteLengthTable.put("Float",4);
1✔
156
        byteLengthTable.put("Double",8);
1✔
157
        // STRLs are defined in their own section, outside of the 
158
        // main data. In the <data> section they are referenced 
159
        // by 2 x 4 byte values, "(v,o)", 8 bytes total.
160
        byteLengthTable.put("STRL",8);
1✔
161

162
        variableTypeTable.put(65530,"Byte");
1✔
163
        variableTypeTable.put(65529,"Integer");
1✔
164
        variableTypeTable.put(65528,"Long");
1✔
165
        variableTypeTable.put(65527,"Float");
1✔
166
        variableTypeTable.put(65526,"Double");
1✔
167
    }
168
    
169
    private static String unfVersionNumber = "6";
1✔
170

171
    private static final List<Float> FLOAT_MISSING_VALUES = Arrays.asList(
1✔
172
            0x1.000p127f, 0x1.001p127f, 0x1.002p127f, 0x1.003p127f,
1✔
173
            0x1.004p127f, 0x1.005p127f, 0x1.006p127f, 0x1.007p127f,
1✔
174
            0x1.008p127f, 0x1.009p127f, 0x1.00ap127f, 0x1.00bp127f,
1✔
175
            0x1.00cp127f, 0x1.00dp127f, 0x1.00ep127f, 0x1.00fp127f,
1✔
176
            0x1.010p127f, 0x1.011p127f, 0x1.012p127f, 0x1.013p127f,
1✔
177
            0x1.014p127f, 0x1.015p127f, 0x1.016p127f, 0x1.017p127f,
1✔
178
            0x1.018p127f, 0x1.019p127f, 0x1.01ap127f);
1✔
179

180
    private Set<Float> FLOAT_MISSING_VALUE_SET =
1✔
181
            new HashSet<>(FLOAT_MISSING_VALUES);
182

183
    private static final List<Double> DOUBLE_MISSING_VALUE_LIST = Arrays.asList(
1✔
184
            0x1.000p1023, 0x1.001p1023, 0x1.002p1023, 0x1.003p1023, 0x1.004p1023,
1✔
185
            0x1.005p1023, 0x1.006p1023, 0x1.007p1023, 0x1.008p1023, 0x1.009p1023,
1✔
186
            0x1.00ap1023, 0x1.00bp1023, 0x1.00cp1023, 0x1.00dp1023, 0x1.00ep1023,
1✔
187
            0x1.00fp1023, 0x1.010p1023, 0x1.011p1023, 0x1.012p1023, 0x1.013p1023,
1✔
188
            0x1.014p1023, 0x1.015p1023, 0x1.016p1023, 0x1.017p1023, 0x1.018p1023,
1✔
189
            0x1.019p1023, 0x1.01ap1023);
1✔
190

191
    private Set<Double> DOUBLE_MISSING_VALUE_SET =
1✔
192
            new HashSet<>(DOUBLE_MISSING_VALUE_LIST);
193

194
    private static SimpleDateFormat sdf_ymdhmsS = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); // sdf
1✔
195

196
    private static SimpleDateFormat sdf_ymd = new SimpleDateFormat("yyyy-MM-dd"); // sdf2
1✔
197

198
    private static SimpleDateFormat sdf_hms = new SimpleDateFormat("HH:mm:ss"); // stf
1✔
199

200
    private static SimpleDateFormat sdf_yw = new SimpleDateFormat("yyyy-'W'ww");
1✔
201

202
    // stata's calendar
203
    private static Calendar GCO_STATA = new GregorianCalendar(TimeZone.getTimeZone("GMT"));
1✔
204

205
    private static String[] DATE_TIME_FORMAT = {
1✔
206
        "%tc", "%td", "%tw", "%tq", "%tm", "%th", "%ty",
207
        "%d", "%w", "%q", "%m", "h", "%tb"
208
    };
209
    // New "business calendar format" has been added in Stata 12. -- L.A. 
210
    private static String[] DATE_TIME_CATEGORY = {
1✔
211
        "time", "date", "date", "date", "date", "date", "date",
212
        "date", "date", "date", "date", "date", "date"
213
    };
214
    private static Map<String, String> DATE_TIME_FORMAT_TABLE = new LinkedHashMap<String, String>();
1✔
215

216
    private static long MILLISECCONDS_PER_DAY = 24 * 60 * 60 * 1000L;
1✔
217

218
    private static long STATA_BIAS_TO_EPOCH;
219

220
    static {
221

222
        sdf_ymdhmsS.setTimeZone(TimeZone.getTimeZone("GMT"));
1✔
223
        sdf_ymd.setTimeZone(TimeZone.getTimeZone("GMT"));
1✔
224
        sdf_hms.setTimeZone(TimeZone.getTimeZone("GMT"));
1✔
225
        sdf_yw.setTimeZone(TimeZone.getTimeZone("GMT"));
1✔
226

227
        // set stata's calendar
228
        GCO_STATA.set(1, 1960);// year
1✔
229
        GCO_STATA.set(2, 0); // month
1✔
230
        GCO_STATA.set(5, 1);// day of month
1✔
231
        GCO_STATA.set(9, 0);// AM(0) or PM(1)
1✔
232
        GCO_STATA.set(10, 0);// hh
1✔
233
        GCO_STATA.set(12, 0);// mm
1✔
234
        GCO_STATA.set(13, 0);// ss
1✔
235
        GCO_STATA.set(14, 0); // SS millisecond
1✔
236

237
        STATA_BIAS_TO_EPOCH  = GCO_STATA.getTimeInMillis(); // =  -315619200000
1✔
238
        
239
        for (int i=0; i<DATE_TIME_FORMAT.length; i++){
1✔
240
            DATE_TIME_FORMAT_TABLE.put(DATE_TIME_FORMAT[i],DATE_TIME_CATEGORY[i]);
1✔
241
        }
242
    }
243

244
    // instance fields //
245
    private static Logger logger = Logger.getLogger(DTAFileReader.class.getPackage().getName());
1✔
246

247
    private DataTable dataTable = new DataTable();
1✔
248

249
    private DTADataMap dtaMap = null;
1✔
250

251
    // Stata has a mechanism for sharing defined category labels between
252
    // multiple variables. A variable may have an explicitly defined (and named)
253
    // table of category labels; and another variable, instead of defining its 
254
    // own, may be referencing it by name. 
255
    // The following lookup table is for maintaining this reference map, 
256
    // between variables and named value tables. It is populated from a 
257
    // fixed-width section early on in the file. 
258
    private String[] valueLabelsLookupTable = null;
1✔
259

260
    private Map<String, Integer> constantTable;
261
    
262
    private Map<String, String> cachedGSOs;
263

264
    private NumberFormat twoDigitFormatter = new DecimalFormat("00");
1✔
265

266
    private NumberFormat doubleNumberFormatter = new DecimalFormat();
1✔
267

268
    TabularDataIngest ingesteddata = new TabularDataIngest();
1✔
269

270
    private int DTAVersion;
271

272
    private int headerLength;
273

274
    private int dataLabelLength;
275

276
    private boolean hasSTRLs = false;
1✔
277

278
    /* variableTypes is a list of string values representing the type of 
279
     * data values *stored* in the file - "byte", "integer", "float", "string", 
280
     * etc. We need this information as we're reading the data, to know how
281
     * many bytes to read for every object type and how to convert the binary
282
     * data into the proper Java type.
283
     * It's important to note that these types are *Stata* types - the types
284
     * of the variables on the DVN side may change (see below).
285
     * The variableTypesFinal will describe the data values once they have 
286
     * been read and stored in the tab. file. This is an important distinction: 
287
     * for example, the time/data values are stored as binary numeric values 
288
     * in Stata files, but we'll be storing them as strings in the DVN tabular
289
     * files.
290
     */
291

292
    private String[] variableTypes=null;
1✔
293

294
    private String[] dateVariableFormats=null; 
1✔
295
          
296
    private static final String MissingValueForTabDelimitedFile = "";
297

298
    private String[] MIME_TYPE = {
1✔
299
        "application/x-stata",
300
        "application/x-stata-13",
301
        "application/x-stata-14",
302
        "application/x-stata-15"
303
    };
304
         
305
    // Constructor -----------------------------------------------------------//
306
    public NewDTAFileReader(TabularDataFileReaderSpi originator, int DTAVersion) {
307
        super(originator);
1✔
308
        
309
        this.DTAVersion = DTAVersion;
1✔
310
        STATA_RELEASE_NUMBER.put(DTAVersion, "v." + (DTAVersion-104));
1✔
311

312
        CONSTANT_TABLE.put(DTAVersion, releaseconstant);
1✔
313
    }
1✔
314

315

316
    /*
317
     * This method configures Stata's release-specific parameters:
318
     */
319
    private void init() throws IOException {
320
        //
321
        logger.fine("release number=" + DTAVersion);
1✔
322

323
        BYTE_MISSING_VALUE -= MISSING_VALUE_BIAS;
1✔
324
        INT_MISSIG_VALUE -= MISSING_VALUE_BIAS;
1✔
325
        LONG_MISSING_VALUE -= MISSING_VALUE_BIAS;
1✔
326

327
        constantTable = CONSTANT_TABLE.get(DTAVersion);
1✔
328

329
        headerLength = constantTable.get("HEADER") - DTA_MAGIC_NUMBER_LENGTH;
1✔
330

331
        dataLabelLength = headerLength - (NVAR_FIELD_LENGTH
1✔
332
                + NOBS_FIELD_LENGTH + TIME_STAMP_LENGTH);
333
        logger.fine("data_label_length=" + dataLabelLength);
1✔
334

335
        logger.fine("constant table to be used:\n" + constantTable);
1✔
336

337
        doubleNumberFormatter.setGroupingUsed(false);
1✔
338
        doubleNumberFormatter.setMaximumFractionDigits(340);
1✔
339
    }
1✔
340

341
    @Override
342
    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException {
343
        logger.fine("NewDTAFileReader: read() start");
1✔
344

345
        // shit ton of diagnostics (still) needed here!!  -- L.A.
346
        if (dataFile != null) {
1✔
347
            throw new IOException("this plugin does not support external raw data files");
1✔
348
        }
349

350
        DataReader dataReader;
351

352
        init();
1✔
353
        dataReader = new DataReader(stream);
1✔
354
        dataReader.readOpeningTag(TAG_DTA);
1✔
355
        readHeader(dataReader);
1✔
356
        readMap(dataReader);
1✔
357
        readVariableTypes(dataReader);
1✔
358
        readVariableNames(dataReader);
1✔
359
        readSortOrder(dataReader);
1✔
360
        readDisplayFormats(dataReader);
1✔
361
        readValueLabelFormatNames(dataReader);
1✔
362
        readVariableLabels(dataReader);
1✔
363
        // "characteristics" - STATA-proprietary information
364
        // (we are skipping it)
365
        readCharacteristics(dataReader);
1✔
366
        
367
        String variableHeaderLine = null; 
1✔
368
        
369
        if (storeWithVariableHeader) {
1✔
NEW
370
            variableHeaderLine = generateVariableHeader(dataTable.getDataVariables());
×
371
        }
372
        readData(dataReader, variableHeaderLine);
1✔
373

374
        // (potentially) large, (potentially) non-ASCII character strings
375
        // saved outside the <data> section, and referenced 
376
        // in the data with (v,o) notation - docs have more info
377
        readSTRLs(dataReader);
1✔
378
        readValueLabels(dataReader);
1✔
379
        dataReader.readClosingTag(TAG_DTA);
1✔
380

381
        ingesteddata.setDataTable(dataTable);
1✔
382

383
        logger.fine("NewDTAFileReader: read() end.");
1✔
384
        return ingesteddata;
1✔
385
    }
386

387
    private void readHeader(DataReader dataReader) throws IOException {
388
        logger.fine("readHeader(): start");
1✔
389

390
        if (dataReader == null) {
1✔
391
            throw new IllegalArgumentException("stream == null!");
×
392
        }
393

394
        logger.fine("reading the version header.");
1✔
395

396
        dataReader.readOpeningTag(TAG_HEADER);
1✔
397
        String dtaVersionTag = dataReader.readPrimitiveStringSection(TAG_HEADER_FILEFORMATID, 3);
1✔
398

399
        if (!("117".equals(dtaVersionTag)||"118".equals(dtaVersionTag)||"119".equals(dtaVersionTag))) {
1✔
400
            throw new IOException("Unexpected version tag found: " + dtaVersionTag + "; expected value: 117-119.");
×
401
        }
402

403
        String byteOrderTag = dataReader.readPrimitiveStringSection(TAG_HEADER_BYTEORDER);
1✔
404

405
        logger.fine("byte order: "+byteOrderTag);
1✔
406

407
        dataReader.setLSF("LSF".equals(byteOrderTag));
1✔
408

409
        long varNumber = dataReader.readIntegerSection(TAG_HEADER_VARNUMBER, DTAVersion == 119? 4: 2);
1✔
410
        logger.fine("number of variables: " + varNumber);
1✔
411

412
        /**
413
         * 5.1.4 N, # of observations
414
         *
415
         * N, the number of observations stored in the dataset, is recorded as
416
         * a 4 or 8-byte unsigned integer field recorded according to byteorder.
417
         */
418
        long obsNumber = dataReader.readIntegerSection(TAG_HEADER_OBSNUMBER, DTAVersion == 117? 4: 8);
1✔
419
        logger.fine("number of observations: " + obsNumber);
1✔
420

421
        dataTable.setVarQuantity(varNumber);
1✔
422
        dataTable.setCaseQuantity(obsNumber);
1✔
423

424
        dataTable.setOriginalFileFormat(MIME_TYPE[0]);
1✔
425
        
426
        dataTable.setOriginalFormatVersion("STATA " + (DTAVersion-104));
1✔
427
        dataTable.setUnf("UNF:pending");
1✔
428

429
        // The word "dataset" below is used in its STATA parlance meaning, 
430
        // i.e., this is a label that describes the datafile.
431
        String datasetLabel;
432
        if (DTAVersion==117){
1✔
433
            datasetLabel = dataReader.readDefinedStringSection(TAG_HEADER_FILELABEL, 80);
1✔
434
        }else{
435
            datasetLabel = dataReader.readLabelSection(TAG_HEADER_FILELABEL, 320);
1✔
436
        }
437
        logger.fine("Stata \"dataset\" label: " + datasetLabel);
1✔
438

439
        // TODO: 
440
        // We are not doing anything with this label. But maybe we should?
441
        // We could add a "description" field to the Dataverse DataTable object, 
442
        // and maybe put it there. Alternatively we could add some other mechanism for 
443
        // the ingest plugin to pass this label back to Dataverse, and maybe 
444
        // appending it to the DataFile description in the FileMetadata object. 
445
        // Probably not the highest priority. 
446
        String datasetTimeStamp = dataReader.readDefinedStringSection(TAG_HEADER_TIMESTAMP, 17);
1✔
447
        logger.fine("dataset time stamp: " + datasetTimeStamp);
1✔
448

449
        if (datasetTimeStamp == null
1✔
450
                || (datasetTimeStamp.length() > 0 && datasetTimeStamp.length() < 17)) {
1✔
451
            throw new IOException("unexpected/invalid length of the time stamp in the NewDTA header.");
×
452
        } else {
453
            // If we decide that we actually want/need to use this time stamp for any 
454
            // practical purposes (again, we could add it to the descriptive 
455
            // metadata somehow), we should probably validate it against dd Mon yyyy hh:mm.
456
        }
457

458
        dataReader.readClosingTag("header");
1✔
459
        logger.fine("readHeader(): end");
1✔
460
    }
1✔
461

462
    private void readMap(DataReader reader) throws IOException {
463
        logger.fine("Map section; at offset " + reader.getByteOffset());
1✔
464
        reader.readOpeningTag(TAG_MAP);
1✔
465

466
        dtaMap = new DTADataMap();
1✔
467

468
        long dta_offset_stata_data = reader.readULong();
1✔
469
        logger.fine("dta_offset_stata_data: " + dta_offset_stata_data);
1✔
470
        dtaMap.setOffset_head(dta_offset_stata_data);
1✔
471
        long dta_offset_map = reader.readULong();
1✔
472
        logger.fine("dta_offset_map: " + dta_offset_map);
1✔
473
        dtaMap.setOffset_map(dta_offset_map);
1✔
474
        long dta_offset_variable_types = reader.readULong();
1✔
475
        logger.fine("dta_offset_variable_types: " + dta_offset_variable_types);
1✔
476
        dtaMap.setOffset_types(dta_offset_variable_types);
1✔
477
        long dta_offset_varnames = reader.readULong();
1✔
478
        logger.fine("dta_offset_varnames: " + dta_offset_varnames);
1✔
479
        dtaMap.setOffset_varnames(dta_offset_varnames);
1✔
480
        long dta_offset_sortlist = reader.readULong();
1✔
481
        logger.fine("dta_offset_sortlist: " + dta_offset_sortlist);
1✔
482
        dtaMap.setOffset_srtlist(dta_offset_sortlist);
1✔
483
        long dta_offset_formats = reader.readULong();
1✔
484
        logger.fine("dta_offset_formats: " + dta_offset_formats);
1✔
485
        dtaMap.setOffset_fmts(dta_offset_formats);
1✔
486
        long dta_offset_value_label_names = reader.readULong();
1✔
487
        logger.fine("dta_offset_value_label_names: " + dta_offset_value_label_names);
1✔
488
        dtaMap.setOffset_vlblnames(dta_offset_value_label_names);
1✔
489
        long dta_offset_variable_labels = reader.readULong();
1✔
490
        logger.fine("dta_offset_variable_labels: " + dta_offset_variable_labels);
1✔
491
        dtaMap.setOffset_varlabs(dta_offset_variable_labels);
1✔
492
        long dta_offset_characteristics = reader.readULong();
1✔
493
        logger.fine("dta_offset_characteristics: " + dta_offset_characteristics);
1✔
494
        dtaMap.setOffset_characteristics(dta_offset_characteristics);
1✔
495
        long dta_offset_data = reader.readULong();
1✔
496
        logger.fine("dta_offset_data: " + dta_offset_data);
1✔
497
        dtaMap.setOffset_data(dta_offset_data);
1✔
498
        long dta_offset_strls = reader.readULong();
1✔
499
        logger.fine("dta_offset_strls: " + dta_offset_strls);
1✔
500
        dtaMap.setOffset_strls(dta_offset_strls);
1✔
501
        long dta_offset_value_labels = reader.readULong();
1✔
502
        logger.fine("dta_offset_value_labels: " + dta_offset_value_labels);
1✔
503
        dtaMap.setOffset_vallabs(dta_offset_value_labels);
1✔
504
        long dta_offset_data_close = reader.readULong();
1✔
505
        logger.fine("dta_offset_data_close: " + dta_offset_data_close);
1✔
506
        dtaMap.setOffset_data_close(dta_offset_data_close);
1✔
507
        long dta_offset_eof = reader.readULong();
1✔
508
        logger.fine("dta_offset_eof: " + dta_offset_eof);
1✔
509
        dtaMap.setOffset_eof(dta_offset_eof);
1✔
510

511
        reader.readClosingTag(TAG_MAP);
1✔
512

513
    }
1✔
514

515
    /* 
516
     * Variable type information is stored in the <variable_types>...</variable_types>
517
     * section, as number_of_variables * 2 byte values. 
518
     * Consult the Stata documentation for the type definition codes. 
519
     */
520
    private void readVariableTypes(DataReader reader) throws IOException {
521
        logger.fine("Type section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_types());
1✔
522
        reader.readOpeningTag(TAG_VARIABLE_TYPES);
1✔
523

524
        List<DataVariable> variableList = new ArrayList<>();
1✔
525
        // setup variableTypeList
526
        variableTypes = new String[dataTable.getVarQuantity().intValue()];
1✔
527

528
        for (int i = 0; i < dataTable.getVarQuantity(); i++) {
1✔
529
            int type = reader.readUShort();
1✔
530
            logger.fine("variable " + i + ": type=" + type);
1✔
531
            DataVariable dv = new DataVariable(i, dataTable);
1✔
532

533
            variableTypes[i] = configureVariableType(dv, type);
1✔
534

535
            variableList.add(dv);
1✔
536

537
        }
538

539
        reader.readClosingTag(TAG_VARIABLE_TYPES);
1✔
540
        dataTable.setDataVariables(variableList);
1✔
541

542
    }
1✔
543

544
    private String configureVariableType(DataVariable dv, int type) throws IOException {
545
        String typeLabel = null;
1✔
546

547
        if (variableTypeTable.containsKey(type)) {
1✔
548
            typeLabel = variableTypeTable.get(type);
1✔
549

550
            dv.setTypeNumeric();
1✔
551
            switch (typeLabel) {
1✔
552
                case "Byte":
553
                case "Integer":
554
                case "Long":
555
                    // these are treated as discrete:
556
                    dv.setIntervalDiscrete();
1✔
557
                    break;
1✔
558
                case "Float":
559
                case "Double":
560
                    // these are treated as contiuous:
561
                    dv.setIntervalContinuous();
1✔
562
                    break;
1✔
563
                default:
564
                    throw new IOException("Unrecognized type label: " + typeLabel + " for Stata type value (short) " + type + ".");
×
565
            }
1✔
566

567
        } else {
568
            // String:
569
            //
570
            // 32768 - flexible length STRL;
571
            // 1 ... 2045 - fixed-length STRF;
572

573
            if (type == 32768) {
1✔
574
                typeLabel = "STRL";
1✔
575
                hasSTRLs = true;
1✔
576

577
            } else if (type > 0 && type < 2046) {
1✔
578
                typeLabel = "STR" + type;
1✔
579
            } else {
580
                throw new IOException("unknown variable type value encountered: " + type);
×
581
            }
582

583
            dv.setTypeCharacter();
1✔
584
            dv.setIntervalDiscrete();
1✔
585
        }
586

587
        return typeLabel;
1✔
588

589
    }
590

591
    /* 
592
     * Variable Names are stored as number_of_variables * 33 byte long
593
     * (zero-padded and zero-terminated) character vectors. 
594
     */
595
    private void readVariableNames(DataReader reader) throws IOException {
596
        logger.fine("Variable names section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_varnames());
1✔
597
        reader.readOpeningTag(TAG_VARIABLE_NAMES);
1✔
598

599
        for (int i = 0; i < dataTable.getVarQuantity(); i++) {
1✔
600
            String variableName = reader.readString(DTAVersion == 117? 33: 129);
1✔
601
            logger.fine("variable " + i + ": name=" + variableName);
1✔
602
            if ((variableName != null) && (!variableName.equals(""))) {
1✔
603
                dataTable.getDataVariables().get(i).setName(variableName);
1✔
604
            } else {
605
                // TODO: Is this condition even possible? 
606
                // Should we be throwing an exception if it's encountered?
607
            }
608
        }
609

610
        reader.readClosingTag(TAG_VARIABLE_NAMES);
1✔
611
    }
1✔
612

613
    private void readSortOrder(DataReader reader) throws IOException {
614
        logger.fine("Sort Order section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_srtlist());
1✔
615
        reader.readOpeningTag(TAG_SORT_ORDER);
1✔
616

617
        for (int i = 0; i < dataTable.getVarQuantity(); i++) {
1✔
618
            long order = reader.readULong(DTAVersion == 119? 4: 2);
1✔
619
            logger.fine("variable " + i + ": sort order=" + order);
1✔
620
            // We don't use this variable sort order at all. 
621
        }
622

623
        // Important! 
624
        // The SORT ORDER section (5.5 in the doc) always contains
625
        // number_of_variables + 1 2 or 4 byte integers depending on version!
626
        long terminatingShort = reader.readULong(DTAVersion == 119? 4: 2);
1✔
627
        reader.readClosingTag(TAG_SORT_ORDER);
1✔
628
    }
1✔
629
    
630
    // Variable Formats are used exclusively for time and date variables. 
631
    private void readDisplayFormats(DataReader reader) throws IOException {
632
        logger.fine("Formats section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_fmts());
1✔
633
        reader.readOpeningTag(TAG_DISPLAY_FORMATS);
1✔
634
        dateVariableFormats = new String[dataTable.getVarQuantity().intValue()];
1✔
635

636
        for (int i = 0; i < dataTable.getVarQuantity(); i++) { 
1✔
637
            String variableFormat = reader.readString(DTAVersion == 117? 49: 57);
1✔
638
            logger.fine("variable " + i + ": displayFormat=" + variableFormat);
1✔
639
            
640
            String variableFormatKey;
641
            if (variableFormat.startsWith("%t")) {
1✔
642
                variableFormatKey = variableFormat.substring(0, 3);
1✔
643
            } else {
644
                variableFormatKey = variableFormat.substring(0, 2);
1✔
645
            }
646
            logger.fine(i + " th variableFormatKey=" + variableFormatKey);
1✔
647

648
            /* 
649
             * Now, let's check if this format is a known time or date format. 
650
             * If so, note that this changes the storage type of the variable!
651
             * i.e., times and dates are stored as binary numeric values, but on 
652
             * the DVN side/in the tab files they will become strings.
653
             */
654
            if (DATE_TIME_FORMAT_TABLE.containsKey(variableFormatKey)) {
1✔
655
                dateVariableFormats[i] = variableFormat;
1✔
656
                dataTable.getDataVariables().get(i).setFormatCategory(DATE_TIME_FORMAT_TABLE.get(variableFormatKey));
1✔
657
                logger.fine(i + "th var: category="
1✔
658
                        + DATE_TIME_FORMAT_TABLE.get(variableFormatKey));
1✔
659
                dataTable.getDataVariables().get(i).setTypeCharacter();
1✔
660
                dataTable.getDataVariables().get(i).setIntervalDiscrete();
1✔
661
            }
662
        }
663

664
        reader.readClosingTag(TAG_DISPLAY_FORMATS);
1✔
665
    }
1✔
666

667
    /*
668
     * Another fixed-field section
669
     */
670
    private void readValueLabelFormatNames(DataReader reader) throws IOException {
671
        logger.fine("Category valuable section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_vlblnames());
1✔
672
        reader.readOpeningTag(TAG_VALUE_LABEL_FORMAT_NAMES);
1✔
673

674
        valueLabelsLookupTable = new String[dataTable.getVarQuantity().intValue()];
1✔
675

676
        for (int i = 0; i < dataTable.getVarQuantity(); i++) {
1✔
677
            String valueLabelFormat = reader.readString(DTAVersion == 117? 33: 129);
1✔
678
            logger.fine("variable " + i + ": value label format=" + valueLabelFormat);
1✔
679
            if ((valueLabelFormat != null) && (!valueLabelFormat.equals(""))) {
1✔
680
                valueLabelsLookupTable[i] = valueLabelFormat;
1✔
681
            }
682
        }
683

684
        reader.readClosingTag(TAG_VALUE_LABEL_FORMAT_NAMES);
1✔
685

686
    }
1✔
687

688
    /* 
689
     * Another fixed-field section
690
     */
691
    private void readVariableLabels(DataReader reader) throws IOException {
692
        logger.fine("Variable labels section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_varlabs());
1✔
693
        reader.readOpeningTag(TAG_VARIABLE_LABELS);
1✔
694

695
        for (int i = 0; i < dataTable.getVarQuantity(); i++) {
1✔
696
            String variableLabel = reader.readUtfString(DTAVersion == 117? 81: 321);
1✔
697
            logger.fine("variable " + i + ": label=" + variableLabel);
1✔
698
            if ((variableLabel != null) && (!variableLabel.equals(""))) {
1✔
699
                dataTable.getDataVariables().get(i).setLabel(variableLabel);
1✔
700
            }
701
        }
702

703
        reader.readClosingTag(TAG_VARIABLE_LABELS);
1✔
704
    }
1✔
705

706
    private void readCharacteristics(DataReader reader) throws IOException {
707
        logger.fine("Characteristics section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_characteristics());
1✔
708
        reader.readOpeningTag(TAG_CHARACTERISTICS);
1✔
709

710
        reader.skipDefinedSections(TAG_CHARACTERISTICS_SUBSECTION);
1✔
711

712
        reader.readClosingTag(TAG_CHARACTERISTICS);
1✔
713

714
    }
1✔
715

716
    private void readData(DataReader reader, String variableHeaderLine) throws IOException {
717
        logger.fine("Data section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_data());
1✔
718
        logger.fine("readData(): start");
1✔
719
        reader.readOpeningTag(TAG_DATA);
1✔
720

721
        int nvar = dataTable.getVarQuantity().intValue();
1✔
722
        int nobs = dataTable.getCaseQuantity().intValue();
1✔
723

724
        int[] variableByteLengths = getVariableByteLengths(variableTypes);
1✔
725
        int bytes_per_row = calculateBytesPerRow(variableByteLengths);
1✔
726

727
        logger.fine("data dimensions[observations x variables] = (" + nobs + "x" + nvar + ")");
1✔
728
        logger.fine("bytes per row=" + bytes_per_row + " bytes");
1✔
729
        logger.fine("variableTypes=" + Arrays.deepToString(variableTypes));
1✔
730

731
        // create a File object to save the tab-delimited data file
732
        File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab");
1✔
733

734
        // save the temp tab-delimited file in the return ingest object:        
735
        ingesteddata.setTabDelimitedFile(tabDelimitedDataFile);
1✔
736

737
        FileOutputStream fileOutTab = new FileOutputStream(tabDelimitedDataFile);
1✔
738
        PrintWriter pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true);
1✔
739

740
        // add the variable header here, if needed
741
        if (variableHeaderLine != null) {
1✔
NEW
742
            pwout.println(variableHeaderLine); 
×
743
        }
744
        
745
        logger.fine("Beginning to read data stream.");
1✔
746

747
        for (int i = 0; i < nobs; i++) {
1✔
748
            Object[] dataRow = new Object[nvar];
1✔
749

750
            // TODO: 
751
            // maybe intercept any potential exceptions here, and add more 
752
            // diagnostic info, before re-throwing...
753
            int byte_offset = 0;
1✔
754
            for (int columnCounter = 0; columnCounter < nvar; columnCounter++) {
1✔
755

756
                String varType = variableTypes[columnCounter];
1✔
757

758
                // 4.0 Check if this is a time/date variable: 
759
                boolean isDateTimeDatum = false;
1✔
760
                String formatCategory = dataTable.getDataVariables().get(columnCounter).getFormatCategory();
1✔
761
                if (formatCategory != null && (formatCategory.equals("time") || formatCategory.equals("date"))) {
1✔
762
                    isDateTimeDatum = true;
1✔
763
                }
764

765
                String variableFormat = dateVariableFormats[columnCounter];
1✔
766

767
                if (varType == null || varType.equals("")) {
1✔
768
                    throw new IOException("Undefined variable type encountered in readData()");
×
769
                }
770

771
                if (varType.equals("Byte")) { // signed
1✔
772
                    byte byte_datum = reader.readByte();
1✔
773

774
                    logger.fine(i + "-th row " + columnCounter
1✔
775
                            + "=th column byte =" + byte_datum);
776
                    if (byte_datum >= BYTE_MISSING_VALUE) {
1✔
777
                        logger.fine(i + "-th row " + columnCounter
1✔
778
                                + "=th column byte MV=" + byte_datum);
779
                        dataRow[columnCounter] = MissingValueForTabDelimitedFile;
1✔
780
                    } else {
781
                        dataRow[columnCounter] = byte_datum;
1✔
782
                        logger.fine(i + "-th row " + columnCounter
1✔
783
                                + "-th column byte value=" + byte_datum);
784
                    }
785

786
                    byte_offset++;
1✔
787
                } else if (varType.equals("Integer")) { // signed
1✔
788
                    short short_datum = (short) reader.readShort();
1✔
789

790
                    logger.fine(i + "-th row " + columnCounter
1✔
791
                            + "=th column stata int =" + short_datum);
792

793
                    if (short_datum >= INT_MISSIG_VALUE) {
1✔
794
                        logger.fine(i + "-th row " + columnCounter
1✔
795
                                + "=th column stata long missing value=" + short_datum);
796
                        dataRow[columnCounter] = MissingValueForTabDelimitedFile;
1✔
797
                    } else {
798

799
                        if (isDateTimeDatum) {
1✔
800

801
                            DecodedDateTime ddt = decodeDateTimeData("short", variableFormat, Short.toString(short_datum));
×
802
                            logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format);
×
803
                            dataRow[columnCounter] = ddt.decodedDateTime;
×
804
                            dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
×
805

806
                        } else {
×
807
                            dataRow[columnCounter] = short_datum;
1✔
808
                            logger.fine(i + "-th row " + columnCounter
1✔
809
                                    + "-th column \"integer\" value=" + short_datum);
810
                        }
811
                    }
812
                    byte_offset += 2;
1✔
813
                } else if (varType.equals("Long")) { // stata-Long = java's int: 4 byte
1✔
814
                    int int_datum = reader.readInt();
1✔
815

816
                    if (int_datum >= LONG_MISSING_VALUE) {
1✔
817
                        dataRow[columnCounter] = MissingValueForTabDelimitedFile;
×
818
                    } else {
819
                        if (isDateTimeDatum) {
1✔
820
                            DecodedDateTime ddt = decodeDateTimeData("int", variableFormat, Integer.toString(int_datum));
×
821
                            logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format);
×
822
                            dataRow[columnCounter] = ddt.decodedDateTime;
×
823
                            dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
×
824

825
                        } else {
×
826
                            dataRow[columnCounter] = int_datum;
1✔
827
                            logger.fine(i + "-th row " + columnCounter
1✔
828
                                    + "-th column \"long\" value=" + int_datum);
829
                        }
830

831
                    }
832
                    byte_offset += 4;
1✔
833
                } else if (varType.equals("Float")) { // STATA float 4-byte
1✔
834

835
                    float float_datum = reader.readFloat();
1✔
836

837
                    logger.fine(i + "-th row " + columnCounter
1✔
838
                            + "=th column float =" + float_datum);
839
                    if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) {
1✔
840
                        logger.fine(i + "-th row " + columnCounter
×
841
                                + "=th column float missing value=" + float_datum);
842
                        dataRow[columnCounter] = MissingValueForTabDelimitedFile;
×
843

844
                    } else {
845

846
                        if (isDateTimeDatum) {
1✔
847
                            DecodedDateTime ddt = decodeDateTimeData("float", variableFormat, doubleNumberFormatter.format(float_datum));
1✔
848
                            logger.fine(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format);
1✔
849
                            dataRow[columnCounter] = ddt.decodedDateTime;
1✔
850
                            dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
1✔
851
                        } else {
1✔
852
                            dataRow[columnCounter] = float_datum;
1✔
853
                            logger.fine(i + "-th row " + columnCounter
1✔
854
                                    + "=th column float value:" + float_datum);
855
                            // This may be temporary - but for now (as in, while I'm testing 
856
                            // 4.0 ingest against 3.* ingest, I need to be able to tell if a 
857
                            // floating point value was a single, or double float in the 
858
                            // original STATA file: -- L.A. Jul. 2014
859
                            dataTable.getDataVariables().get(columnCounter).setFormat("float");
1✔
860
                            // ?
861
                        }
862

863
                    }
864
                    byte_offset += 4;
1✔
865
                } else if (varType.equals("Double")) { // STATA double 8 bytes
1✔
866

867
                    double double_datum = reader.readDouble();
1✔
868
                    if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) {
1✔
869
                        logger.finer(i + "-th row " + columnCounter
×
870
                                + "=th column double missing value=" + double_datum);
871
                        dataRow[columnCounter] = MissingValueForTabDelimitedFile;
×
872
                    } else {
873

874
                        if (isDateTimeDatum) {
1✔
875
                            DecodedDateTime ddt = decodeDateTimeData("double", variableFormat, doubleNumberFormatter.format(double_datum));
×
876
                            logger.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format);
×
877
                            dataRow[columnCounter] = ddt.decodedDateTime;
×
878
                            dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
×
879
                        } else {
×
880
                            logger.fine(i + "-th row " + columnCounter
1✔
881
                                    + "=th column double value:" + double_datum); //doubleNumberFormatter.format(double_datum));
882

883
                            dataRow[columnCounter] = double_datum; //doubleNumberFormatter.format(double_datum);
1✔
884
                        }
885

886
                    }
887
                    byte_offset += 8;
1✔
888
                } else if (varType.matches("^STR[1-9][0-9]*")) {
1✔
889
                    // String case
890
                    int strVarLength = variableByteLengths[columnCounter];
1✔
891
                    logger.fine(i + "-th row " + columnCounter
1✔
892
                            + "=th column is a string (" + strVarLength + " bytes)");
893
                    // In STATA13+, STRF strings *MUST*
894
                    // be limited to ASCII. UTF8 strings can be stored as 
895
                    // STRLs. 
896
                    String string_datum = reader.readString(strVarLength);
1✔
897
                    if (string_datum.equals("")) {
1✔
898

899
                        logger.fine(i + "-th row " + columnCounter
×
900
                                + "=th column string missing value=" + string_datum);
901

902
                        /* Note: 
903
                         * In Stata, an empty string ("") in a String vector is 
904
                         * the notation for a missing value.
905
                         * So in the resulting tab file it should be stored as such,
906
                         * and not as an empty string (that would be "\"\""). 
907
                         * (This of course means that it's simply not possible 
908
                         * to store actual empty strings in Stata)
909
                         */
910
                        dataRow[columnCounter] = MissingValueForTabDelimitedFile;
×
911
                    } else {
912
                        /*
913
                         * Some special characters, like new lines and tabs need to 
914
                         * be escaped - otherwise they will break our TAB file 
915
                         * structure! 
916
                         */
917

918
                        dataRow[columnCounter] = escapeCharacterString(string_datum);
1✔
919
                    }
920
                    byte_offset += strVarLength;
1✔
921
                } else if (varType.equals("STRL")) {
1✔
922
                    logger.fine("STRL encountered.");
1✔
923

924
                    if (cachedGSOs == null) {
1✔
925
                        cachedGSOs = new LinkedHashMap<>();
1✔
926
                    }
927

928
                    // Reading the (v,o) pair: 
929
                    long v;
930
                    long o;
931
                    
932
                    if(DTAVersion == 117){
1✔
933
                        v = reader.readUInt();
×
934
                        byte_offset += 4;
×
935
                        o = reader.readUInt();
×
936
                        byte_offset += 4;
×
937
                    } else {
938
                        v = reader.readUShort();
1✔
939
                        byte_offset += 2;
1✔
940
                        o = reader.readULong(6);
1✔
941
                        byte_offset += 6;
1✔
942
                    }
943
                    // create v,o pair; save, for now:
944
                    String voPair = v + "," + o;
1✔
945
                    dataRow[columnCounter] = voPair;
1✔
946

947
                    // TODO: 
948
                    // would it make sense to validate v and o here? 
949
                    // Making sure v <= varNum and o < numbObs; 
950
                    // or, if o == numObs, v <= columnCounter; 
951
                    // -- per the Stata 13+ spec...
952
                    if (!(v == columnCounter + 1 && o == i + 1)) {
1✔
953
                        if (!cachedGSOs.containsKey(voPair)) {
×
954
                            cachedGSOs.put(voPair, "");
×
955
                            // this means we need to cache this GSO, when 
956
                            // we read the STRLS section later on. 
957
                        }
958
                    }
959

960
                } else {
1✔
961
                    logger.warning("unknown variable type found: " + varType);
×
962
                    String errorMessage
×
963
                            = "unknown variable type encounted when reading data section: " + varType;
964
                    throw new IOException(errorMessage);
×
965

966
                }
967
            } 
968

969
            if (byte_offset != bytes_per_row) {
1✔
970
                throw new IOException("Unexpected number of bytes read for data row " + i + "; " + bytes_per_row + " expected, " + byte_offset + " read.");
×
971
            }
972

973
            // Dump the row of data to the tab-delimited file:
974
            pwout.println(StringUtils.join(dataRow, "\t"));
1✔
975

976
            logger.fine("finished reading " + i + "-th row");
1✔
977

978
        }  // for (rows)
979

980
        pwout.close();
1✔
981

982
        reader.readClosingTag(TAG_DATA);
1✔
983
        logger.fine("NewDTA Ingest: readData(): end.");
1✔
984

985
    }
1✔
986

987
    /* 
988
     * STRLs: 
989
     */
990
    private void readSTRLs(DataReader reader) throws IOException {
991
        logger.fine("STRLs section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_strls());
1✔
992

993
        if (hasSTRLs) {
1✔
994
            reader.readOpeningTag(TAG_STRLS);
1✔
995

996
            File intermediateTabFile = ingesteddata.getTabDelimitedFile();
1✔
997
            FileInputStream fileInTab = new FileInputStream(intermediateTabFile);
1✔
998

999
            Scanner scanner = new Scanner(fileInTab);
1✔
1000
            scanner.useDelimiter("\\n");
1✔
1001

1002
            File finalTabFile = File.createTempFile("finalTabfile.", ".tab");
1✔
1003
            FileOutputStream fileOutTab = new FileOutputStream(finalTabFile);
1✔
1004
            PrintWriter pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true);
1✔
1005

1006
            logger.fine("Setting the tab-delimited file to " + finalTabFile.getName());
1✔
1007
            ingesteddata.setTabDelimitedFile(finalTabFile);
1✔
1008

1009
            int nvar = dataTable.getVarQuantity().intValue();
1✔
1010
            int nobs = dataTable.getCaseQuantity().intValue();
1✔
1011

1012
            String[] line;
1013
            
1014
            //@todo: adjust for the case of storing the file with the variable header
1015

1016
            for (int obsindex = 0; obsindex < nobs; obsindex++) {
1✔
1017
                if (scanner.hasNext()) {
1✔
1018
                    line = (scanner.next()).split("\t", -1);
1✔
1019

1020
                    for (int varindex = 0; varindex < nvar; varindex++) {
1✔
1021
                        if ("STRL".equals(variableTypes[varindex])) {
1✔
1022
                            // this is a STRL; needs to be re-processed:
1023

1024
                            String voPair = line[varindex];
1✔
1025
                            long v;
1026
                            long o;
1027
                            if (voPair == null) {
1✔
1028
                                throw new IOException("Failed to read an intermediate v,o Pair for variable "
×
1029
                                        + varindex + ", observation " + obsindex);
1030
                            }
1031

1032
                            if ("0,0".equals(voPair)) {
1✔
1033
                                // This is a code for an empty string - "";
1034
                                // doesn't need to be defined or looked up.
1035

1036
                                line[varindex] = "\"\"";
×
1037
                            } else {
1038
                                String[] voTokens = voPair.split(",", 2);
1✔
1039

1040
                                try {
1041
                                    v = new Long(voTokens[0]);
1✔
1042
                                    o = new Long(voTokens[1]);
1✔
1043
                                } catch (NumberFormatException nfex) {
×
1044
                                    throw new IOException("Illegal v,o value: " + voPair + " for variable "
×
1045
                                            + varindex + ", observation " + obsindex);
1046
                                }
1✔
1047

1048
                                if (v == varindex + 1 && o == obsindex + 1) {
1✔
1049
                                    // This v,o must be defined in the STRLs section:
1050
                                    line[varindex] = readGSO(reader, v, o);
1✔
1051
                                    if (line[varindex] == null) {
1✔
1052
                                        throw new IOException("Failed to read GSO value for " + voPair);
×
1053
                                    }
1054

1055
                                } else {
1056
                                    // This one must have been cached already:
1057
                                    if (cachedGSOs.get(voPair) != null
×
1058
                                            && !cachedGSOs.get(voPair).equals("")) {
×
1059
                                        line[varindex] = cachedGSOs.get(voPair);
×
1060
                                    } else {
1061
                                        throw new IOException("GSO string unavailable for v,o value " + voPair);
×
1062
                                    }
1063
                                }
1064
                            }
1065
                        }
1066
                    }
1067
                    // Dump the row of data to the tab-delimited file:
1068
                    pwout.println(StringUtils.join(line, "\t"));
1✔
1069
                }
1070
            }
1071

1072
            scanner.close();
1✔
1073
            pwout.close();
1✔
1074

1075
            reader.readClosingTag(TAG_STRLS);
1✔
1076
        } else {
1✔
1077
            // If this data file doesn't use STRLs, we can just skip 
1078
            // this section, and assume that we are done with the 
1079
            // tabular data file.
1080
            reader.readPrimitiveSection(TAG_STRLS);
1✔
1081
        }
1082

1083
        //reader.readClosingTag(TAG_STRLS);
1084
    }
1✔
1085

1086
    private String readGSO(DataReader reader, long v, long o) throws IOException {
1087
        if (!reader.checkTag(STRL_GSO_HEAD)) {
1✔
1088
            return null;
×
1089
        }
1090

1091
        // Skipping the GSO header - fixed string "GSO":
1092
        reader.readBytes(STRL_GSO_HEAD.length());
1✔
1093

1094
        // Reading the stored (v,o) pair: 
1095
        long vStored = reader.readUInt();
1✔
1096
        long oStored = reader.readULong(DTAVersion == 117? 4: 8);
1✔
1097

1098
        String voPair = v + "," + o;
1✔
1099

1100
        if (vStored != v || oStored != o) {
1✔
1101
            throw new IOException("GSO reading mismatch: expected v,o pair: "
×
1102
                    + voPair + ", found: " + vStored + "," + oStored);
1103
        }
1104

1105
        short type = reader.readUByte();
1✔
1106
        boolean binary = false;
1✔
1107

1108
        if (type == 129) {
1✔
1109
            logger.fine("STRL TYPE: binary");
×
1110
            binary = true;
×
1111
        } else if (type == 130) {
1✔
1112
            logger.fine("STRL TYPE: ascii");
1✔
1113
        } else {
1114
            logger.warning("WARNING: unknown STRL type: " + type);
×
1115
        }
1116

1117
        long length = reader.readUInt();
1✔
1118

1119
        logger.fine("Advertised length of the STRL: " + length);
1✔
1120

1121
        // TODO:
1122
        // length can technically be 0 < length < 2^^32;
1123
        // but Java arrays are only [int], i.e., can only have < 2^^31
1124
        // elements; readBytes() allocates and returns a byte[] array.
1125
        // so I should probably check the value of length - if it 
1126
        // can fit into a signed int; not that it's likely to happen 
1127
        // in real life. Still, should we throw an exception here, if 
1128
        // this length is > 2^^31?
1129
        byte[] contents = reader.readBytes((int) length);
1✔
1130

1131
        String gsoString;
1132
        if (binary) {
1✔
1133
            gsoString = new String(contents, "utf8"); 
×
1134
        } else {
1135
            gsoString = new String(contents, 0, (int) length - 1, "US-ASCII");
1✔
1136
        }
1137

1138
        logger.fine("GSO " + v + "," + o + ": " + gsoString);
1✔
1139

1140
        String escapedGsoString = escapeCharacterString(gsoString);
1✔
1141

1142
        if (cachedGSOs.containsKey(voPair)) {
1✔
1143
            // We need to cache this GSO: 
1144
            if (!"".equals(cachedGSOs.get(voPair))) {
×
1145
                throw new IOException("Multiple GSO definitions for v,o " + voPair);
×
1146
            }
1147
            cachedGSOs.put(voPair, escapedGsoString);
×
1148
        }
1149

1150
        return escapedGsoString;
1✔
1151
    }
1152

1153
    private void readValueLabels(DataReader reader) throws IOException {
1154
        logger.fine("Value Labels section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_vallabs());
1✔
1155
        logger.fine("readValueLabels(): start.");
1✔
1156

1157
        reader.readOpeningTag(TAG_VALUE_LABELS);
1✔
1158

1159
        while (reader.checkTag("<" + TAG_VALUE_LABELS_LBL_DEF + ">")) {
1✔
1160
            reader.readOpeningTag(TAG_VALUE_LABELS_LBL_DEF);
1✔
1161
            long label_table_length = reader.readUInt();
1✔
1162

1163
            String label_table_name = reader.readString(DTAVersion == 117? 33: 129);
1✔
1164
            
1165
            reader.readBytes(3); 
1✔
1166

1167
            // read the value_label_table that follows. 
1168
            // should be label_table_length. 
1169
            int number_of_categories = (int) reader.readUInt();
1✔
1170
            long text_length = reader.readUInt();
1✔
1171

1172
            long value_category_offset = 8;
1✔
1173

1174
            long[] value_label_offsets = new long[number_of_categories];
1✔
1175
            long[] value_label_offsets_sorted = null; 
1✔
1176
            long[] category_values = new long[number_of_categories];
1✔
1177
            String[] category_value_labels = new String[number_of_categories];
1✔
1178

1179
            boolean alreadySorted = true;
1✔
1180
            
1181
            for (int i = 0; i < number_of_categories; i++) {
1✔
1182
                value_label_offsets[i] = reader.readUInt();
1✔
1183
                logger.fine("offset " + i + ": " + value_label_offsets[i]);
1✔
1184
                value_category_offset += 4;
1✔
1185
                if (i > 0 && value_label_offsets[i] < value_label_offsets[i-1]) {
1✔
1186
                    alreadySorted = false;
×
1187
                }
1188
            }
1189

1190
            if (!alreadySorted) {
1✔
1191
                //value_label_offsets_sorted = new long[number_of_categories];
1192
                value_label_offsets_sorted = Arrays.copyOf(value_label_offsets, number_of_categories);
×
1193
                Arrays.sort(value_label_offsets_sorted);
×
1194
            }
1195

1196
            for (int i = 0; i < number_of_categories; i++) {
1✔
1197
                category_values[i] = reader.readInt();
1✔
1198
                value_category_offset += 4;
1✔
1199
            }
1200

1201
            int total_label_bytes = 0;
1✔
1202

1203
            long label_offset;
1204
            long label_end;
1205
            int label_length;
1206

1207
            // Read the remaining bytes in this <lbl> section. 
1208
            // This byte[] array will contain all the value labels for the
1209
            // variable. Each is terminated by the binary zero byte; so we 
1210
            // can read the bytes for each label at the defined offset until 
1211
            // we encounter \000. Or we can rely on the (sorted) list of offsets
1212
            // to determine where each label ends (implemented below). 
1213
            byte[] labelBytes = null;
1✔
1214
            if((int)text_length != 0) { //If length is 0 we don't need to read any bytes
1✔
1215
                labelBytes = reader.readBytes((int)text_length);
1✔
1216
            }
1217
            
1218
            for (int i = 0; i < number_of_categories; i++) {
1✔
1219
                label_offset = value_label_offsets[i];
1✔
1220

1221
                if (value_label_offsets_sorted == null) {
1✔
1222
                    label_end = i < number_of_categories - 1 ? value_label_offsets[i + 1] : text_length;
1✔
1223
                } else {
1224
                    int sortedPos = Arrays.binarySearch(value_label_offsets_sorted, label_offset);
×
1225
                    label_end = sortedPos < number_of_categories - 1 ? value_label_offsets_sorted[sortedPos + 1] : text_length;
×
1226
                }
1227
                label_length = (int)(label_end - label_offset);
1✔
1228

1229
                category_value_labels[i] = new String(Arrays.copyOfRange(labelBytes, (int)label_offset, (int)label_end-1), "UTF8");
1✔
1230
                total_label_bytes += label_length;
1✔
1231
            }
1232

1233
            value_category_offset += total_label_bytes;
1✔
1234

1235
            logger.fine("text_length: " + text_length);
1✔
1236
            logger.fine("total_label_bytes: " + total_label_bytes);
1✔
1237
            if (total_label_bytes != text_length) {
1✔
1238
                throw new IOException("<read mismatch in readLabels()>");
×
1239
            }
1240

1241
            if (value_category_offset != label_table_length) {
1✔
1242
                throw new IOException("<read mismatch in readLabels() 2>");
×
1243
            }
1244
            reader.readClosingTag(TAG_VALUE_LABELS_LBL_DEF);
1✔
1245
            
1246
            List<DataVariable> dataVariables = dataTable.getDataVariables();
1✔
1247
            // Find the variables that may be linking to this Category Values Table 
1248
            // and create VariableCategory objects for the corresponding 
1249
            // DataVariables: 
1250
            for (int i = 0; i < dataVariables.size(); i++) {
1✔
1251
                DataVariable dataVariable = dataVariables.get(i);
1✔
1252
                if (label_table_name.equals(valueLabelsLookupTable[i])) {
1✔
1253
                    logger.fine("cross-linking value label table for " + label_table_name);
1✔
1254
                    
1255
                    for (int j = 0; j < number_of_categories; j++) {
1✔
1256
                        VariableCategory cat = new VariableCategory();
1✔
1257

1258
                        long cat_value = category_values[j];
1✔
1259
                        String cat_label = category_value_labels[j];
1✔
1260

1261
                        cat.setValue("" + cat_value);
1✔
1262
                        cat.setLabel(cat_label);
1✔
1263

1264
                        /* cross-link the variable and category to each other: */
1265
                        cat.setDataVariable(dataVariable);
1✔
1266
                        dataVariable.getCategories().add(cat);
1✔
1267
                    }
1268
                }
1269
            }
1270
        }
1✔
1271

1272
        reader.readClosingTag(TAG_VALUE_LABELS);
1✔
1273
        logger.fine("readValueLabels(): end.");
1✔
1274

1275
    }
1✔
1276

1277
    /*
1278
     * Helper methods for decoding data:
1279
     */
1280
    private int calculateBytesPerRow(int[] variableByteLengths) throws IOException {
1281
        if (variableByteLengths == null || variableByteLengths.length != dataTable.getVarQuantity()) {
1✔
1282
            throw new IOException("<internal variable byte offsets table not properly configured>");
×
1283
        }
1284
        int bytes_per_row = 0;
1✔
1285

1286
        for (int i = 0; i < dataTable.getVarQuantity(); i++) {
1✔
1287
            if (variableByteLengths[i] < 1) {
1✔
1288
                throw new IOException("<bad variable byte offset: " + variableByteLengths[i] + ">");
×
1289
            }
1290
            bytes_per_row += variableByteLengths[i];
1✔
1291
        }
1292

1293
        return bytes_per_row;
1✔
1294
    }
1295

1296
    private int[] getVariableByteLengths(String[] variableTypes) throws IOException {
1297
        if (variableTypes == null || variableTypes.length != dataTable.getVarQuantity()) {
1✔
1298
            throw new IOException("<internal variable types not properly configured>");
×
1299
        }
1300

1301
        int[] variableByteLengths = new int[dataTable.getVarQuantity().intValue()];
1✔
1302

1303
        for (int i = 0; i < dataTable.getVarQuantity(); i++) {
1✔
1304
            variableByteLengths[i] = getVariableByteLength(variableTypes[i]);
1✔
1305
        }
1306

1307
        return variableByteLengths;
1✔
1308
    }
1309

1310
    private int getVariableByteLength(String variableType) throws IOException {
1311
        int byte_length = 0;
1✔
1312

1313
        if (variableType == null || variableType.equals("")) {
1✔
1314
            throw new IOException("<empty variable type in attempted byte length lookup.>");
×
1315
        }
1316
        if (byteLengthTable.containsKey(variableType)) {
1✔
1317
            return byteLengthTable.get(variableType);
1✔
1318
        }
1319

1320
        if (variableType.matches("^STR[1-9][0-9]*")) {
1✔
1321
            String stringLengthToken = variableType.substring(3);
1✔
1322
            Integer stringLength;
1323
            try {
1324
                stringLength = new Integer(stringLengthToken);
1✔
1325
            } catch (NumberFormatException nfe) {
×
1326
                stringLength = null;
×
1327
            }
1✔
1328
            if (stringLength == null || stringLength < 1 || stringLength > 2045) {
1✔
1329
                throw new IOException("Invalid STRF encountered: " + variableType);
×
1330
            }
1331
            return stringLength;
1✔
1332
        }
1333

1334
        throw new IOException("Unknown/invalid variable type: " + variableType);
×
1335
    }
1336

1337
    private class DecodedDateTime {
1✔
1338

1339
        String format;
1340
        String decodedDateTime;
1341
    }
1342

1343
    private DecodedDateTime decodeDateTimeData(String storageType, String FormatType, String rawDatum) throws IOException {
1344

1345
        logger.fine("(storageType, FormatType, rawDatum)=("
1✔
1346
                + storageType + ", " + FormatType + ", " + rawDatum + ")");
1347
        /*
1348
         *         Historical note:
1349
                   pseudofunctions,  td(), tw(), tm(), tq(), and th()
1350
                used to be called     d(),  w(),  m(),  q(), and  h().
1351
                Those names still work but are considered anachronisms.
1352

1353
         */
1354

1355
        long milliSeconds;
1356
        String decodedDateTime;
1357
        String format;
1358

1359
        if (FormatType.matches("^%tc.*")) {
1✔
1360
            // tc is a relatively new format
1361
            // datum is millisecond-wise
1362
            milliSeconds = Math.round(new Double(rawDatum)) + STATA_BIAS_TO_EPOCH;
1✔
1363
            decodedDateTime = sdf_ymdhmsS.format(new Date(milliSeconds));
1✔
1364
            format = sdf_ymdhmsS.toPattern();
1✔
1365
            logger.fine("tc: result=" + decodedDateTime + ", format = " + format);
1✔
1366

1367
        } else if (FormatType.matches("^%t?d.*")) {
1✔
1368
            milliSeconds = Math.round(new Double(rawDatum)) * MILLISECCONDS_PER_DAY + STATA_BIAS_TO_EPOCH;
1✔
1369
            logger.fine("milliSeconds=" + milliSeconds);
1✔
1370

1371
            decodedDateTime = sdf_ymd.format(new Date(milliSeconds));
1✔
1372
            format = sdf_ymd.toPattern();
1✔
1373
            logger.fine("td:" + decodedDateTime + ", format = " + format);
1✔
1374

1375
        } else if (FormatType.matches("^%t?w.*")) {
1✔
1376

1377
            long weekYears = Math.round(new Double(rawDatum));
1✔
1378
            long left = Math.abs(weekYears) % 52L;
1✔
1379
            long years;
1380
            if (weekYears < 0L) {
1✔
1381
                left = 52L - left;
×
1382
                if (left == 52L) {
×
1383
                    left = 0L;
×
1384
                }
1385
                //out.println("left="+left);
1386
                years = (Math.abs(weekYears) - 1) / 52L + 1L;
×
1387
                years *= -1L;
×
1388
            } else {
1389
                years = weekYears / 52L;
1✔
1390
            }
1391

1392
            String yearString = Long.toString(1960L + years);
1✔
1393
            String dayInYearString = new DecimalFormat("000").format((left * 7) + 1);
1✔
1394
            String yearDayInYearString = yearString + "-" + dayInYearString;
1✔
1395

1396
            Date tempDate = null;
1✔
1397
            try {
1398
                tempDate = new SimpleDateFormat("yyyy-DDD").parse(yearDayInYearString);
1✔
1399
            } catch (ParseException ex) {
×
1400
                throw new IOException(ex);
×
1401
            }
1✔
1402

1403
            decodedDateTime = sdf_ymd.format(tempDate.getTime());
1✔
1404
            format = sdf_ymd.toPattern();
1✔
1405

1406
        } else if (FormatType.matches("^%t?m.*")) {
1✔
1407
            // month 
1408
            long monthYears = Math.round(new Double(rawDatum));
1✔
1409
            long left = Math.abs(monthYears) % 12L;
1✔
1410
            long years;
1411
            if (monthYears < 0L) {
1✔
1412
                left = 12L - left;
×
1413
                //out.println("left="+left);
1414
                years = (Math.abs(monthYears) - 1) / 12L + 1L;
×
1415
                years *= -1L;
×
1416
            } else {
1417
                years = monthYears / 12L;
1✔
1418
            }
1419

1420
            if (left == 12L) {
1✔
1421
                left = 0L;
×
1422
            }
1423
            Long monthdata = (left + 1);
1✔
1424
            String month = "-" + twoDigitFormatter.format(monthdata) + "-01";
1✔
1425
            long year = 1960L + years;
1✔
1426
            String monthYear = year + month;
1✔
1427
            logger.fine("rawDatum=" + rawDatum + ": monthYear=" + monthYear);
1✔
1428

1429
            decodedDateTime = monthYear;
1✔
1430
            format = "yyyy-MM-dd";
1✔
1431
            logger.fine("tm:" + decodedDateTime + ", format:" + format);
1✔
1432

1433
        } else if (FormatType.matches("^%t?q.*")) {
1✔
1434
            // quarter
1435
            long quarterYears = Math.round(new Double(rawDatum));
1✔
1436
            long left = Math.abs(quarterYears) % 4L;
1✔
1437
            long years;
1438
            if (quarterYears < 0L) {
1✔
1439
                left = 4L - left;
×
1440
                //out.println("left="+left);
1441
                years = (Math.abs(quarterYears) - 1) / 4L + 1L;
×
1442
                years *= -1L;
×
1443
            } else {
1444
                years = quarterYears / 4L;
1✔
1445
            }
1446

1447
            String quarter = null;
1✔
1448

1449
            if ((left == 0L) || (left == 4L)) {
1✔
1450
                //quarter ="q1"; //
1451
                quarter = "-01-01";
1✔
1452
            } else if (left == 1L) {
1✔
1453
                //quarter = "q2"; //
1454
                quarter = "-04-01";
1✔
1455
            } else if (left == 2L) {
1✔
1456
                //quarter = "q3"; //
1457
                quarter = "-07-01";
1✔
1458
            } else if (left == 3L) {
1✔
1459
                //quarter = "q4"; //
1460
                quarter = "-11-01";
1✔
1461
            }
1462

1463
            long year = 1960L + years;
1✔
1464
            String quarterYear = Long.toString(year) + quarter;
1✔
1465
            logger.fine("rawDatum=" + rawDatum + ": quarterYear=" + quarterYear);
1✔
1466

1467
            decodedDateTime = quarterYear;
1✔
1468
            format = "yyyy-MM-dd";
1✔
1469
            logger.fine("tq:" + decodedDateTime + ", format:" + format);
1✔
1470

1471
        } else if (FormatType.matches("^%t?h.*")) {
1✔
1472
            // half year
1473
            // odd number:2nd half
1474
            // even number: 1st half
1475

1476
            long halvesYears = Math.round(new Double(rawDatum));
1✔
1477
            long left = Math.abs(halvesYears) % 2L;
1✔
1478
            long years;
1479
            if (halvesYears < 0L) {
1✔
1480
                years = (Math.abs(halvesYears) - 1) / 2L + 1L;
×
1481
                years *= -1L;
×
1482
            } else {
1483
                years = halvesYears / 2L;
1✔
1484
            }
1485

1486
            String half;
1487
            if (left != 0L) {
1✔
1488
                // odd number => 2nd half: "h2"
1489
                //half ="h2"; //
1490
                half = "-07-01";
1✔
1491
            } else {
1492
                // even number => 1st half: "h1"
1493
                //half = "h1"; //
1494
                half = "-01-01";
1✔
1495
            }
1496
            long year = 1960L + years;
1✔
1497
            String halfYear = Long.toString(year) + half;
1✔
1498
            logger.fine("rawDatum=" + rawDatum + ": halfYear=" + halfYear);
1✔
1499

1500
            decodedDateTime = halfYear;
1✔
1501
            format = "yyyy-MM-dd";
1✔
1502
            logger.fine("th:" + decodedDateTime + ", format:" + format);
1✔
1503

1504
        } else if (FormatType.matches("^%t?y.*")) {
1✔
1505
            // year type's origin is 0 AD
1506
            decodedDateTime = rawDatum;
1✔
1507
            format = "yyyy";
1✔
1508
            logger.fine("th:" + decodedDateTime);
1✔
1509
        } else {
1510
            decodedDateTime = rawDatum;
×
1511
            format = null;
×
1512
        }
1513
        DecodedDateTime retValue = new DecodedDateTime();
1✔
1514
        retValue.decodedDateTime = decodedDateTime;
1✔
1515
        retValue.format = format;
1✔
1516
        return retValue;
1✔
1517
    }
1518

1519
    private class DTADataMap {
1✔
1520

1521
        private long dta_offset_stata_data = 0;
1✔
1522
        private long dta_offset_map = 0;
1✔
1523
        private long dta_offset_variable_types = 0;
1✔
1524
        private long dta_offset_varnames = 0;
1✔
1525
        private long dta_offset_sortlist = 0;
1✔
1526
        private long dta_offset_formats = 0;
1✔
1527
        private long dta_offset_value_label_names = 0;
1✔
1528
        private long dta_offset_variable_labels = 0;
1✔
1529
        private long dta_offset_characteristics = 0;
1✔
1530
        private long dta_offset_data = 0;
1✔
1531
        private long dta_offset_strls = 0;
1✔
1532
        private long dta_offset_value_labels = 0;
1✔
1533
        private long dta_offset_data_close = 0;
1✔
1534
        private long dta_offset_eof = 0;
1✔
1535

1536
        // getters:
1537
        public long getOffset_head() {
1538
            return dta_offset_stata_data;
×
1539
        }
1540

1541
        public long getOffset_map() {
1542
            return dta_offset_map;
×
1543
        }
1544

1545
        public long getOffset_types() {
1546
            return dta_offset_variable_types;
1✔
1547
        }
1548

1549
        public long getOffset_varnames() {
1550
            return dta_offset_varnames;
1✔
1551
        }
1552

1553
        public long getOffset_srtlist() {
1554
            return dta_offset_sortlist;
1✔
1555
        }
1556

1557
        public long getOffset_fmts() {
1558
            return dta_offset_formats;
1✔
1559
        }
1560

1561
        public long getOffset_vlblnames() {
1562
            return dta_offset_value_label_names;
1✔
1563
        }
1564

1565
        public long getOffset_varlabs() {
1566
            return dta_offset_variable_labels;
1✔
1567
        }
1568

1569
        public long getOffset_characteristics() {
1570
            return dta_offset_characteristics;
1✔
1571
        }
1572

1573
        public long getOffset_data() {
1574
            return dta_offset_data;
1✔
1575
        }
1576

1577
        public long getOffset_strls() {
1578
            return dta_offset_strls;
1✔
1579
        }
1580

1581
        public long getOffset_vallabs() {
1582
            return dta_offset_value_labels;
1✔
1583
        }
1584

1585
        public long getOffset_data_close() {
1586
            return dta_offset_data_close;
×
1587
        }
1588

1589
        public long getOffset_eof() {
1590
            return dta_offset_eof;
×
1591
        }
1592

1593
        // setters: 
1594
        public void setOffset_head(long dta_offset_stata_data) {
1595
            this.dta_offset_stata_data = dta_offset_stata_data;
1✔
1596
        }
1✔
1597

1598
        public void setOffset_map(long dta_offset_map) {
1599
            this.dta_offset_map = dta_offset_map;
1✔
1600
        }
1✔
1601

1602
        public void setOffset_types(long dta_offset_variable_types) {
1603
            this.dta_offset_variable_types = dta_offset_variable_types;
1✔
1604
        }
1✔
1605

1606
        public void setOffset_varnames(long dta_offset_varnames) {
1607
            this.dta_offset_varnames = dta_offset_varnames;
1✔
1608
        }
1✔
1609

1610
        public void setOffset_srtlist(long dta_offset_sortlist) {
1611
            this.dta_offset_sortlist = dta_offset_sortlist;
1✔
1612
        }
1✔
1613

1614
        public void setOffset_fmts(long dta_offset_formats) {
1615
            this.dta_offset_formats = dta_offset_formats;
1✔
1616
        }
1✔
1617

1618
        public void setOffset_vlblnames(long dta_offset_value_label_names) {
1619
            this.dta_offset_value_label_names = dta_offset_value_label_names;
1✔
1620
        }
1✔
1621

1622
        public void setOffset_varlabs(long dta_offset_variable_labels) {
1623
            this.dta_offset_variable_labels = dta_offset_variable_labels;
1✔
1624
        }
1✔
1625

1626
        public void setOffset_characteristics(long dta_offset_characteristics) {
1627
            this.dta_offset_characteristics = dta_offset_characteristics;
1✔
1628
        }
1✔
1629

1630
        public void setOffset_data(long dta_offset_data) {
1631
            this.dta_offset_data = dta_offset_data;
1✔
1632
        }
1✔
1633

1634
        public void setOffset_strls(long dta_offset_strls) {
1635
            this.dta_offset_strls = dta_offset_strls;
1✔
1636
        }
1✔
1637

1638
        public void setOffset_vallabs(long dta_offset_value_labels) {
1639
            this.dta_offset_value_labels = dta_offset_value_labels;
1✔
1640
        }
1✔
1641

1642
        public void setOffset_data_close(long dta_offset_data_close) {
1643
            this.dta_offset_data_close = dta_offset_data_close;
1✔
1644
        }
1✔
1645

1646
        public void setOffset_eof(long dta_offset_eof) {
1647
            this.dta_offset_eof = dta_offset_eof;
1✔
1648
        }
1✔
1649
    }
1650
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc