• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #22987

23 Aug 2024 06:44PM CUT coverage: 20.61% (-0.2%) from 20.791%
#22987

Pull #10781

github

landreev
added an upfront locks check to the /addGlobusFiles api #10623
Pull Request #10781: Improved handling of Globus uploads

4 of 417 new or added lines in 15 files covered. (0.96%)

4194 existing lines in 35 files now uncovered.

17388 of 84365 relevant lines covered (20.61%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

54.97
/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java
1
/*
2
   Copyright (C) 2005-2012, by the President and Fellows of Harvard College.
3

4
   Licensed under the Apache License, Version 2.0 (the "License");
5
   you may not use this file except in compliance with the License.
6
   You may obtain a copy of the License at
7

8
         http://www.apache.org/licenses/LICENSE-2.0
9

10
   Unless required by applicable law or agreed to in writing, software
11
   distributed under the License is distributed on an "AS IS" BASIS,
12
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
   See the License for the specific language governing permissions and
14
   limitations under the License.
15

16
   Dataverse Network - A web application to share, preserve and analyze research data.
17
   Developed at the Institute for Quantitative Social Science, Harvard University.
18
   Version 3.0.
19
*/
20
package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav;
21

22
import java.io.BufferedInputStream;
23
import java.io.File;
24
import java.io.FileNotFoundException;
25
import java.io.FileOutputStream;
26
import java.io.IOException;
27
import java.io.OutputStreamWriter;
28
import java.io.PrintWriter;
29
import java.io.UnsupportedEncodingException;
30
import java.nio.ByteBuffer;
31
import java.nio.ByteOrder;
32

33
import java.text.DecimalFormat;
34
import java.text.NumberFormat;
35
import java.text.SimpleDateFormat;
36
import java.util.ArrayList;
37
import java.util.Arrays;
38
import java.util.Calendar;
39
import java.util.Date;
40
import java.util.GregorianCalendar;
41
import java.util.HashMap;
42
import java.util.HashSet;
43
import java.util.Iterator;
44
import java.util.LinkedHashMap;
45
import java.util.LinkedHashSet;
46
import java.util.List;
47
import java.util.Map;
48
import java.util.Set;
49
import java.util.TimeZone;
50
import java.util.logging.Logger;
51
import java.util.regex.Matcher;
52
import java.util.regex.Pattern;
53

54

55
import org.apache.commons.codec.binary.Hex;
56
import org.apache.commons.lang3.ArrayUtils;
57
import org.apache.commons.lang3.StringUtils;
58

59
import edu.harvard.iq.dataverse.DataTable;
60
import edu.harvard.iq.dataverse.datavariable.DataVariable;
61
import edu.harvard.iq.dataverse.datavariable.SummaryStatistic;
62
import edu.harvard.iq.dataverse.datavariable.VariableCategory;
63
import edu.harvard.iq.dataverse.datavariable.VariableRange;
64

65
import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataFileReader;
66
import edu.harvard.iq.dataverse.ingest.tabulardata.spi.TabularDataFileReaderSpi;
67
import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest;
68
import edu.harvard.iq.dataverse.ingest.tabulardata.InvalidData;
69

70

71
/**
72
 * ingest plugin for SPSS SAV file format.
73
 *
74
 * This reader plugin has been fully re-implemented for the DVN 4.0;
75
 * It is still borrows heavily from, and builds on the basis of the 
76
 * old implementation by Akio Sone, that was in use in the versions 
77
 * 2-3 of the DVN.
78
 * 
79
 * @author Akio Sone at UNC-Odum
80
 * @author Leonid Andreev
81
 */
82

83
public class SAVFileReader  extends TabularDataFileReader{
84
    
85
    // static fields ---------------------------------------------------------//
86
    private static String[] FORMAT_NAMES = {"sav", "SAV"};
1✔
87
    private static String[] EXTENSIONS = {"sav"};
1✔
88
    private static String[] MIME_TYPE = {"application/x-spss-sav"};
1✔
89

90
    private static final int LENGTH_SAV_INT_BLOCK = 4;
91
    // note: OBS block is either double or String, not Integer
92
    private static final int LENGTH_SAV_OBS_BLOCK = 8;
93
    
94
    private static final int SAV_MAGIC_NUMBER_LENGTH = LENGTH_SAV_INT_BLOCK;
95
    
96
    private static String SAV_FILE_SIGNATURE = "$FL2";
1✔
97

98
    
99
    
100
    // Record Type 1 fields
101
    private static final int LENGTH_RECORDTYPE1 = 172;
102
    
103
    private static final int LENGTH_SPSS_PRODUCT_INFO = 60;
104
    
105
    private static final int FILE_LAYOUT_CONSTANT = 2;
106
    
107
    private static final int LENGTH_FILE_LAYOUT_CODE =  LENGTH_SAV_INT_BLOCK;
108
    
109
    private static final int LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE = LENGTH_SAV_INT_BLOCK;
110
    
111
    private static final int LENGTH_COMPRESSION_SWITCH = LENGTH_SAV_INT_BLOCK;
112
    
113
    private static final int LENGTH_CASE_WEIGHT_VARIABLE_INDEX = LENGTH_SAV_INT_BLOCK;
114
    
115
    private static final int LENGTH_NUMBER_OF_CASES =   LENGTH_SAV_INT_BLOCK;
116
    
117
    private static final int LENGTH_COMPRESSION_BIAS =  LENGTH_SAV_OBS_BLOCK;
118
    
119
    private static final int LENGTH_FILE_CREATION_INFO = 84;
120
    
121
    private static final int length_file_creation_date = 9;
122
    private static final int length_file_creation_time = 8;
123
    private static final int length_file_creation_label= 64;
124
    private static final int length_file_creation_padding = 3;
125
    
126
    // Recorde Type 2
127
    
128
    private static final int LENGTH_RECORDTYPE2_FIXED = 32;
129
    private static final int LENGTH_RECORD_TYPE2_CODE = 4;
130
    private static final int LENGTH_TYPE_CODE = 4;
131
    private static final int LENGTH_LABEL_FOLLOWS = 4;
132
    private static final int LENGTH_MISS_VALUE_FORMAT_CODE= 4;
133
    private static final int LENGTH_PRINT_FORMAT_CODE = 4;;
134
    private static final int LENGTH_WRITE_FORMAT_CODE = 4;
135
    private static final int LENGTH_VARIABLE_NAME =  8;
136
    private static final int LENGTH_VARIABLE_LABEL= 4;
137

138
    private static final int LENGTH_MISS_VAL_OBS_CODE = LENGTH_SAV_OBS_BLOCK;
139
    
140
    // Record Type 3/4
141
    private static final int LENGTH_RECORDTYPE3_HEADER_CODE = 4;
142
    private static final int LENGTH_RECORD_TYPE3_CODE = 4;
143
    private static final int LENGTH_RT3_HOW_MANY_LABELS = 4;
144
    private static final int LENGTH_RT3_VALUE  = LENGTH_SAV_OBS_BLOCK;
145
    private static final int LENGTH_RT3_LABEL_LENGTH =1;
146
    
147
    private static final int LENGTH_RECORD_TYPE4_CODE =      4;
148
    private static final int LENGTH_RT4_HOW_MANY_VARIABLES = 4;
149
    private static final int LENGTH_RT4_VARIABLE_INDEX =     4;
150
    
151
    // Record Type 6
152
    private static final int LENGTH_RECORD_TYPE6_CODE =  4;
153
    private static final int LENGTH_RT6_HOW_MANY_LINES = 4;
154
    private static final int LENGTH_RT6_DOCUMENT_LINE = 80;
155
    
156
    // Record Type 7
157
    private static final int LENGTH_RECORD_TYPE7_CODE =  4;
158
    private static final int LENGTH_RT7_SUB_TYPE_CODE =  4;
159

160
    // Record Type 999
161
    private static final int LENGTH_RECORD_TYPE999_CODE =  4;
162
    private static final int LENGTH_RT999_FILLER        =  4;
163

164
    
165
    private static final List<String> RecordType7SubType4Fields= new ArrayList<String>();
1✔
166
    private static final Set<Integer> validMissingValueCodeSet = new HashSet<Integer>();
1✔
167
    private static final Map<Integer, Integer> missingValueCodeUnits = new HashMap<Integer, Integer>();
1✔
168

169
    private static double SYSMIS_LITTLE =0xFFFFFFFFFFFFEFFFL;
1✔
170
    private static double SYSMIS_BIG =0xFFEFFFFFFFFFFFFFL;
1✔
171
    
172
    private static Calendar GCO = new GregorianCalendar();
1✔
173
    
174
    private String[] dateFormatList;
175

176
    static {
177
        
178
        // initialize validMissingValueCodeSet
179
        validMissingValueCodeSet.add(3);
1✔
180
        validMissingValueCodeSet.add(2);
1✔
181
        validMissingValueCodeSet.add(1);
1✔
182
        validMissingValueCodeSet.add(0);
1✔
183
        validMissingValueCodeSet.add(-2);
1✔
184
        validMissingValueCodeSet.add(-3);
1✔
185
        
186
        // initialize missingValueCodeUnits
187
        
188
        missingValueCodeUnits.put(1, 1);
1✔
189
        missingValueCodeUnits.put(2, 2);
1✔
190
        missingValueCodeUnits.put(3, 3);
1✔
191
        missingValueCodeUnits.put(-2,2);
1✔
192
        missingValueCodeUnits.put(-3, 3);
1✔
193
        missingValueCodeUnits.put(0, 0);
1✔
194

195
        RecordType7SubType4Fields.add("SYSMIS");
1✔
196
        RecordType7SubType4Fields.add("HIGHEST");
1✔
197
        RecordType7SubType4Fields.add("LOWEST");
1✔
198
        
199
        // set the origin of GCO to 1582-10-15
200
        GCO.set(1, 1582);// year
1✔
201
        GCO.set(2, 9); // month
1✔
202
        GCO.set(5, 15);// day of month
1✔
203
        GCO.set(9, 0);// AM(0) or PM(1)
1✔
204
        GCO.set(10, 0);// hh
1✔
205
        GCO.set(12, 0);// mm
1✔
206
        GCO.set(13, 0);// ss
1✔
207
        GCO.set(14, 0); // SS millisecond
1✔
208
        GCO.set(15, 0);// z
1✔
209
    }
210

211
    private static final long SPSS_DATE_BIAS = 60*60*24*1000;
212

213
    private static final long SPSS_DATE_OFFSET = SPSS_DATE_BIAS + Math.abs(GCO.getTimeInMillis());
1✔
214

215

216
   // instance fields -------------------------------------------------------//
217
    private static String unfVersionNumber = "6";
1✔
218

219
    // instance fields -------------------------------------------------------//
220

221
    private static Logger dbgLog = Logger.getLogger(SAVFileReader.class.getPackage().getName());
1✔
222

223
    
224
    TabularDataIngest ingesteddata = new TabularDataIngest();
1✔
225
    private DataTable dataTable = new DataTable();
1✔
226
    
227
    Map<String, String> shortToLongVariableNameTable = new LinkedHashMap<String, String>();
1✔
228
    Map<String, String> formatCategoryTable = new LinkedHashMap<String, String>(); 
1✔
229

230

231

232
    private boolean isLittleEndian = false;     
1✔
233
    private boolean isDataSectionCompressed = true; 
1✔
234

235
    private Map<Integer, String> OBSIndexToVariableName =
1✔
236
        new LinkedHashMap<Integer, String>(); 
237
    
238
    private int OBSUnitsPerCase; 
239
    
240
    private List<Integer> variableTypelList= new ArrayList<Integer>(); 
1✔
241
    private List<Integer> OBSwiseTypelList= new ArrayList<Integer>(); 
1✔
242

243
    Map<String, String> printFormatTable = new LinkedHashMap<String, String>(); 
1✔
244
    
245

246
    Set<Integer> obsNonVariableBlockSet = new LinkedHashSet<Integer>(); 
1✔
247
    
248

249
    Map<String, String> valueVariableMappingTable = new LinkedHashMap<String, String>(); 
1✔
250
 
251
    Map<String, Integer> extendedVariablesSizeTable = new LinkedHashMap<String, Integer>();
1✔
252

253

254
    List<String> variableNameList = new ArrayList<String>(); 
1✔
255

256

257
    Map<String, InvalidData> invalidDataTable = new LinkedHashMap<String, InvalidData>(); // this variable used in 2 methods; only one uses it to set the smd value -- ??
1✔
258

259
    NumberFormat doubleNumberFormatter = new DecimalFormat();
1✔
260

261
    Set<Integer> decimalVariableSet = new HashSet<Integer>(); 
1✔
262
    
263
    String[] variableFormatTypeList= null; 
1✔
264

265
    List<Integer> formatDecimalPointPositionList= new ArrayList<Integer>(); 
1✔
266
  
267

268
    int caseWeightVariableOBSIndex = 0; 
1✔
269
    
270

271
    // date/time data formats
272

273
    private SimpleDateFormat sdf_ymd    = new SimpleDateFormat("yyyy-MM-dd");
1✔
274
    private SimpleDateFormat sdf_ymdhms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
1✔
275
    private SimpleDateFormat sdf_dhms   = new SimpleDateFormat("DDD HH:mm:ss");
1✔
276
    private SimpleDateFormat sdf_hms    = new SimpleDateFormat("HH:mm:ss");
1✔
277

278

279
    Map<String, String> OBSTypeHexValue = new LinkedHashMap<String, String>();    
1✔
280
    
281
    /*
282
     * TODO: add a comment explaining the whole thing about this default
283
     * character set. -- L.A. 4.0 beta
284
    */
285
    private String defaultCharSet = "ISO-8859-1";
1✔
286
    //private String defaultCharSet = "US-ASCII"; // -- temporary! -- 4.0 beta 6
287
    private int    spssVersionNumber = 0; 
1✔
288

289

290
    /**
291
     * The <code>String</code> that represents the numeric missing value 
292
     * in the final tab-delimited data file.
293
     */
294
    private String MissingValueForTextDataFileNumeric = "";
1✔
295

296
    
297
    public String getMissingValueForTextDataFileNumeric() {
UNCOV
298
        return MissingValueForTextDataFileNumeric;
×
299
    }
300

301
    
302
    public void setMissingValueForTextDataFileNumeric(String MissingValueToken) {
UNCOV
303
        this.MissingValueForTextDataFileNumeric = MissingValueToken;
×
UNCOV
304
    }
×
305

306

307
    String MissingValueForTextDataFileString = "";
1✔
308

309
    
310
    public String getMissingValueForTextDataFileString() {
UNCOV
311
        return MissingValueForTextDataFileString;
×
312
    }
313

314
    
315
    public void setMissingValueForTextDataFileString(String MissingValueToken) {
UNCOV
316
        this.MissingValueForTextDataFileString = MissingValueToken;
×
UNCOV
317
    }
×
318

319
    
320
    public SAVFileReader(TabularDataFileReaderSpi originator){
321
        super(originator);
1✔
322
    }
1✔
323

324
    // Methods ---------------------------------------------------------------//
325

326
    private void init() throws IOException {
327
        
328
        sdf_ymd.setTimeZone(TimeZone.getTimeZone("GMT"));
1✔
329
        sdf_ymdhms.setTimeZone(TimeZone.getTimeZone("GMT"));
1✔
330
        sdf_dhms.setTimeZone(TimeZone.getTimeZone("GMT"));
1✔
331
        sdf_hms.setTimeZone(TimeZone.getTimeZone("GMT"));
1✔
332
                
333
        doubleNumberFormatter.setGroupingUsed(false);
1✔
334
        doubleNumberFormatter.setMaximumFractionDigits(340);
1✔
335
        
336
        if (getDataLanguageEncoding() != null) {
1✔
UNCOV
337
            defaultCharSet = getDataLanguageEncoding(); 
×
338
        }
339
    }
1✔
340

341
    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException{
342
        dbgLog.info("SAVFileReader: read() start");
1✔
343
        
344
        if (dataFile != null) {
1✔
UNCOV
345
            throw new IOException ("this plugin does not support external raw data files");
×
346
        }
347
        
348
        /* 
349
         * this "try" block is for catching unknown/unexpected exceptions 
350
         * thrown anywhere in the ingest code:
351
         */
352
        try {
353
         /* ingest happens here ... */ 
354
        
355
        // the following methods are now executed, in this order:
356
            
357
        // decodeHeader -- this method doesn't read any [meta]data and 
358
        //    doesn't initialize any values; its only purpose is to 
359
        //    make sure that the file is indeed an SPSS/SAV file. 
360
        // 
361
        // decodeRecordType1 -- there's always one RT1 record; it is 
362
        //    always 176 byte long. it contains the very basic metadata
363
        //    about the data file. most notably, the number of observations
364
        //    and the number of OBS (8 byte values) per observation.
365
        //
366
        // decodeRecordType2 -- there are multiple RT2 records. there's 
367
        //    one RT2 for every OBS (8 byte value); i.e. one per variable,
368
        //    or more per every String variable split into multiple OBS
369
        //    segments. this one is a 400 line method, that may benefit 
370
        //    from being split into smaller methods.
371
        //
372
        // decodeRecordType3and4 -- these sections come in pairs, each
373
        //    pair dedicated to one set of variable labels. 
374
        // decodeRecordType6,
375
        //
376
        // decodeRecordType7 -- this RT contains some extended 
377
        //    metadata for the data file. (including the information 
378
        //    about the extended variables, i.e. variables longer than
379
        //    255 bytes split into 255 byte fragments that are stored 
380
        //    in the data file as independent variables). 
381
        //
382
        // decodeRecordType999 -- this RT does not contain any data; 
383
        //    its sole function is to indicate that the metadata portion 
384
        //    of the data file is over and the data section follows. 
385
        // 
386
        // decodeRecordTypeData -- this method decodes the data section 
387
        //    of the file. Inside this method, 2 distinct methods are 
388
        //    called to process compressed or uncompressed data, depending
389
        //    on which method is used in this data file. 
390

391

392
        String methodCurrentlyExecuted = null; 
1✔
393

394
        try {
395
            methodCurrentlyExecuted = "decodeHeader";
1✔
396
            dbgLog.fine("***** SAVFileReader: executing method decodeHeader");
1✔
397
            decodeHeader(stream); 
1✔
398

399
            methodCurrentlyExecuted = "decodeRecordType1";
1✔
400
            dbgLog.fine("***** SAVFileReader: executing method decodeRecordType1");
1✔
401
            decodeRecordType1(stream); 
1✔
402

403
            methodCurrentlyExecuted = "decodeRecordType2";
1✔
404
            dbgLog.fine("***** SAVFileReader: executing method decodeRecordType1");
1✔
405
            decodeRecordType2(stream); 
1✔
406

407
            methodCurrentlyExecuted = "decodeRecordType3and4"; 
1✔
408
            dbgLog.fine("***** SAVFileReader: executing method decodeRecordType3and4");
1✔
409
            decodeRecordType3and4(stream); 
1✔
410

411
            methodCurrentlyExecuted = "decodeRecordType6";
1✔
412
            dbgLog.fine("***** SAVFileReader: executing method decodeRecordType6");
1✔
413
            decodeRecordType6(stream); 
1✔
414

415
            methodCurrentlyExecuted = "decodeRecordType7";
1✔
416
            dbgLog.fine("***** SAVFileReader: executing method decodeRecordType7");
1✔
417
            decodeRecordType7(stream);
1✔
418

419
            methodCurrentlyExecuted = "decodeRecordType999"; 
1✔
420
            dbgLog.fine("***** SAVFileReader: executing method decodeRecordType999");
1✔
421
            decodeRecordType999(stream);
1✔
422

423
            methodCurrentlyExecuted = "decodeRecordTypeData";
1✔
424
            dbgLog.fine("***** SAVFileReader: executing method decodeRecordTypeData");
1✔
425
            decodeRecordTypeData(stream, storeWithVariableHeader); 
1✔
426

427
                
428
        } catch (IllegalArgumentException e) {
×
429
            //Throwable cause = e.getCause();
430
            dbgLog.fine("***** SAVFileReader: ATTENTION: IllegalArgumentException thrown while executing "+methodCurrentlyExecuted);
×
431
            e.printStackTrace();
×
432
            throw new IOException ( "IllegalArgumentException in method "+methodCurrentlyExecuted+": "+e.getMessage() ); 
×
433
        } catch (IOException e) {
×
UNCOV
434
            dbgLog.fine("***** SAVFileReader: ATTENTION: IOException thrown while executing "+methodCurrentlyExecuted);
×
UNCOV
435
            e.printStackTrace();
×
UNCOV
436
            throw new IOException ( "IO Exception in method "+methodCurrentlyExecuted+": "+e.getMessage() ); 
×
437
        } 
1✔
438
        
439
        /* 
440
         * Final variable type assignments;
441
         * TODO: (maybe?) 
442
         * Instead of doing it here, perhaps all the type assignments need to 
443
         * be done on DataVariable objects directly;  without relying on 
444
         * maps and lists here... -- L.A. 4.0 beta (?)
445
         */
446

447
        
448
        for (int indx = 0; indx < variableTypelList.size(); indx++) {
1✔
449
            String varName = dataTable.getDataVariables().get(indx).getName(); 
1✔
450
            int simpleType = 0;
1✔
451
            if (variableTypelList.get(indx) != null) {
1✔
452
                simpleType = variableTypelList.get(indx).intValue();
1✔
453
            }
454

455
            if (simpleType <= 0) {
1✔
456
                // We need to make one last type adjustment:
457
                // Dates and Times will be stored as character values in the 
458
                // dataverse tab files; even though they are not typed as 
459
                // strings at this point:
460
                // TODO: 
461
                // Make sure the date/time format is properly preserved!
462
                // (see the setFormatCategory below... but double-check!)
463
                // -- L.A. 4.0 alpha
464
                String variableFormatType = variableFormatTypeList[indx];
1✔
465
                if (variableFormatType != null) {
1✔
466
                    if (variableFormatType.equals("time")
1✔
467
                        || variableFormatType.equals("date")) {
1✔
UNCOV
468
                        simpleType = 1; 
×
469
                    
470
                        String formatCategory = formatCategoryTable.get(varName);
×
471

472
                        if (formatCategory != null) {
×
473
                            if (dateFormatList[indx] != null) {
×
474
                                dbgLog.fine("setting format category to "+formatCategory);
×
UNCOV
475
                                dataTable.getDataVariables().get(indx).setFormatCategory(formatCategory);
×
UNCOV
476
                                dbgLog.fine("setting formatschemaname to "+dateFormatList[indx]);
×
UNCOV
477
                                dataTable.getDataVariables().get(indx).setFormat(dateFormatList[indx]);
×
478
                            }
479
                        }
480
                    } else if (variableFormatType.equals("other")) {
1✔
481
                        dbgLog.fine("Variable of format type \"other\"; type adjustment may be needed");
1✔
482
                        dbgLog.fine("SPSS print format: "+printFormatTable.get(dataTable.getDataVariables().get(indx).getName()));
1✔
483
                        
484
                        if (printFormatTable.get(dataTable.getDataVariables().get(indx).getName()).equals("WKDAY")
1✔
485
                            || printFormatTable.get(dataTable.getDataVariables().get(indx).getName()).equals("MONTH")) {
1✔
486
                            // week day or month; 
487
                            // These are not treated as time/date values (meaning, we 
488
                            // don't define time/date formats for them; there's likely 
489
                            // no valid ISO time/date format for just a month or a day 
490
                            // of week). However, the
491
                            // values will be stored in the TAB files as strings, 
492
                            // and not as numerics - as they were stored in the 
493
                            // SAV file. So we need to adjust the type here.
494
                            // -- L.A. 
495
                            
UNCOV
496
                            simpleType = 1;
×
497
                        }
498
                    }
499
                }
500
            }
501
            
502
            // OK, we can now assign the types: 
503
            
504
            if (simpleType > 0) {
1✔
505
                // String: 
506
                dataTable.getDataVariables().get(indx).setTypeCharacter();
1✔
507
                dataTable.getDataVariables().get(indx).setIntervalDiscrete();
1✔
508
            } else {
509
                // Numeric: 
510
                dataTable.getDataVariables().get(indx).setTypeNumeric();
1✔
511
                // discrete or continuous?
512
                // "decimal variables" become dataverse data variables of interval type "continuous":
513
        
514
                if (decimalVariableSet.contains(indx)) {
1✔
UNCOV
515
                    dataTable.getDataVariables().get(indx).setIntervalContinuous();
×
516
                } else {
517
                    dataTable.getDataVariables().get(indx).setIntervalDiscrete();
1✔
518
                }
519
                
520
            }
521
            
522
            // TODO: take care of the SPSS "shortToLongVariableNameTable"
523
            // mapping before returning the ingested data object. -- 4.0 alpha
524
            // (done, below - but verify!)
525
            
526
            if (shortToLongVariableNameTable.containsKey(varName)) {
1✔
527
                String longName = shortToLongVariableNameTable.get(varName); 
1✔
528
                if (longName != null && !longName.equals("")) {
1✔
529
                    dataTable.getDataVariables().get(indx).setName(longName);
1✔
530
                }
531
            }
532
            
533
        }        
534
        
535
        ingesteddata.setDataTable(dataTable);
1✔
536
        } catch (Exception ex) {
×
537
            dbgLog.fine("***** SAVFileReader: ATTENTION: unknown exception thrown.");
×
538
            ex.printStackTrace();
×
UNCOV
539
            String failureMessage = "Unknown exception in SPSS/SAV reader";
×
540
            if (ex.getMessage() != null) {
×
UNCOV
541
                failureMessage = failureMessage.concat(": "+ex.getMessage());
×
542
            } else {
UNCOV
543
                failureMessage = failureMessage.concat("; no further information is available.");
×
544
            }
UNCOV
545
            throw new IOException (failureMessage);    
×
546
        }
1✔
547
        dbgLog.info("SAVFileReader: read() end");
1✔
548
        return ingesteddata;
1✔
549
    }
550
    
551
    void decodeHeader(BufferedInputStream stream) throws IOException {
552
        dbgLog.fine("decodeHeader(): start");
1✔
553
        
554
        if (stream ==null){
1✔
UNCOV
555
            throw new IllegalArgumentException("stream == null!");
×
556
        }
557
        // the length of the magic number is 4 (1-byte character * 4)
558
        // its value is expected to be $FL2
559

560
        byte[] b = new byte[SAV_MAGIC_NUMBER_LENGTH];
1✔
561
        
562
        try {
563
            if (stream.markSupported()){
1✔
564
                stream.mark(100);
1✔
565
            }
566
            int nbytes = stream.read(b, 0, SAV_MAGIC_NUMBER_LENGTH);
1✔
567

568
            if (nbytes == 0){
1✔
569
                throw new IOException();
×
570
            }
571

UNCOV
572
        } catch (IOException ex){
×
573
            //ex.printStackTrace();
UNCOV
574
            throw ex; 
×
575
        }
1✔
576

577
        //printHexDump(b, "hex dump of the byte-array");
578

579
        String hdr4sav = new String(b);
1✔
580
        dbgLog.fine("from string=" + hdr4sav);
1✔
581

582
        if (hdr4sav.equals(SAV_FILE_SIGNATURE)) {
1✔
583
            dbgLog.fine("this file is spss-sav type");
1✔
584
            // initialize version-specific parameter
585
            init();
1✔
586
            
587
            dataTable.setOriginalFileFormat(MIME_TYPE[0]);
1✔
588
            
589
            dataTable.setUnf("UNF:6:");
1✔
590

591
            
592
        } else {
UNCOV
593
            dbgLog.fine("this file is NOT spss-sav type");
×
594

UNCOV
595
            throw new IllegalArgumentException("given file is not spss-sav type");
×
596
        }
597

598
        dbgLog.fine("***** decodeHeader(): end *****");
1✔
599

600
    }
1✔
601

602

603
    void decodeRecordType1(BufferedInputStream stream) throws IOException {
604
        dbgLog.fine("***** decodeRecordType1(): start *****");
1✔
605

606
        if (stream ==null){
1✔
UNCOV
607
            throw new IllegalArgumentException("stream == null!");
×
608
        }
609
        // how to read each recordType
610
        // 1. set-up the following objects before reading bytes
611
        // a. the working byte array
612
        // b. the storage object
613
        // the length of this field: 172bytes = 60 + 4 + 12 + 4 + 8 + 84
614
        // this field consists of 6 distinct blocks
615
        
616
        byte[] recordType1 = new byte[LENGTH_RECORDTYPE1];
1✔
617
        // int caseWeightVariableOBSIndex = 0; 
618
        
619
        try {
620
            int nbytes = stream.read(recordType1, 0, LENGTH_RECORDTYPE1);
1✔
621
            
622
            
623
            //printHexDump(recordType1, "recordType1");
624
            
625
            if (nbytes == 0){
1✔
UNCOV
626
                throw new IOException("reading recordType1: no byte was read");
×
627
            }
628
            
629
            // 1.1 60 byte-String that tells the platform/version of SPSS that
630
            // wrote this file
631
            
632
            int offset_start = 0;
1✔
633
            int offset_end = LENGTH_SPSS_PRODUCT_INFO; // 60 bytes
1✔
634
            
635
            String productInfo = new String(Arrays.copyOfRange(recordType1, offset_start,
1✔
636
                offset_end),"US-ASCII");
637
                
638
            dbgLog.fine("productInfo:\n"+productInfo+"\n");
1✔
639
            dataTable.setOriginalFormatVersion(productInfo);
1✔
640

641
            
642
            // try to parse out the SPSS version that created this data
643
            // file: 
644
            
645
            String spssVersionTag = null; 
1✔
646
            
647
            String regexpVersionNumber = ".*Release ([0-9]*)";
1✔
648
            Pattern versionTagPattern = Pattern.compile(regexpVersionNumber);
1✔
649
            Matcher matcher = versionTagPattern.matcher(productInfo);
1✔
650
            if ( matcher.find() ) {
1✔
UNCOV
651
                spssVersionTag = matcher.group(1); 
×
UNCOV
652
                dbgLog.fine("SPSS Version Number: "+spssVersionTag); 
×
653
            }
654
            
655
            // TODO: 
656
            // try a more elaborate regex (like the one for the "new-style" 
657
            // productInfo line, below), to select the version number, the 
658
            // minor version number and the platform (windows vs. mac) separately. 
659
            // would be cleaner to save just that, rather than the entire 
660
            // productInfo tag. 
661
            // -- L.A. 4.0 beta
662
            
663
            if (spssVersionTag == null || spssVersionTag.equals("")) {
1✔
664
                // Later versions of SPSS have different formatting of the
665
                // productInfo line:
666
                regexpVersionNumber = ".* IBM SPSS STATISTICS.* ([^ ]*) ([0-9][0-9]*)([^ ]*)";
1✔
667
                versionTagPattern = Pattern.compile(regexpVersionNumber);
1✔
668
                matcher = versionTagPattern.matcher(productInfo);
1✔
669
                if (matcher.find()) {
1✔
670
                    String spssPlatformTag = matcher.group(1);
1✔
671
                    spssVersionTag = matcher.group(2);
1✔
672
                    String spssVersionTagMinor = matcher.group(3);
1✔
673
                    
674
                    dbgLog.fine("SPSS Version Number (new style): " + spssVersionTag);
1✔
675
                    dbgLog.fine("SPSS Version/Platform Identification (new style:) " +
1✔
676
                            spssPlatformTag + " " + spssVersionTag + spssVersionTagMinor);
677
                    dataTable.setOriginalFormatVersion(spssVersionTag + 
1✔
678
                            spssVersionTagMinor + " " + 
679
                            spssPlatformTag);
680
                    
681
                }
682
            }
683
            
684
            if (spssVersionTag != null && !spssVersionTag.equals("")) {
1✔
685
                spssVersionNumber = Integer.valueOf(spssVersionTag).intValue();
1✔
686
                
687

688
                /*
689
                 *  Starting with SPSS version 16, the default encoding is 
690
                 *  UTF-8. 
691
                 *  But we are only going to use it if the user did not explicitly
692
                 *  specify the encoding on the addfiles page. Then we'd want 
693
                 *  to stick with whatever they entered. 
694
                 *  (also, it appears that (starting with the same version 16?)
695
                 *  it is actually possible to define the locale/character set
696
                 *  in the file - section 7, sub-type 20; TODO: decide which 
697
                 *  one takes precedence, if we have the encoding defined both
698
                 *  in the file and through the UI. -- L.A. 4.0 beta)
699
                 */
700
                if (spssVersionNumber > 15) {
1✔
701
                    if (getDataLanguageEncoding() == null) {
1✔
702
                        //defaultCharSet = "windows-1252"; // temporary! -- L.A. "UTF-8";
703
                        defaultCharSet = "UTF-8"; 
1✔
704
                    }
705
                }
706
            }
707
             
708
            // TODO: 
709
            // decide if we want to save the [determined/guessed] character set
710
            // somewhere in the dataset object. 
711
            // this may be relevant in cases when accented/non-latin characters
712
            // get ingested incorrectly; 
713
            // -- L.A. 4.0 beta
714
            
715
            // 1.2) 4-byte file-layout-code (byte-order)
716
            
717
            offset_start = offset_end;
1✔
718
            offset_end += LENGTH_FILE_LAYOUT_CODE; // 4 byte
1✔
719
            
720
            ByteBuffer bb_fileLayout_code  = ByteBuffer.wrap(
1✔
721
                    recordType1, offset_start, LENGTH_FILE_LAYOUT_CODE);
722
            
723
            ByteBuffer byteOderTest = bb_fileLayout_code.duplicate();
1✔
724
            // interprete the 4 byte as int
725

726
            int int2test = byteOderTest.getInt();
1✔
727
            
728
            if (int2test == 2 || int2test == 3){
1✔
UNCOV
729
                dbgLog.fine("integer == "+int2test+": the byte-oder of the writer is the same "+
×
730
                "as the counterpart of Java: Big Endian");
731
            } else {
732
                // Because Java's byte-order is always big endian, 
733
                // this(!=2) means this sav file was  written on a little-endian machine
734
                // non-string, multi-bytes blocks must be byte-reversed
735

736
                bb_fileLayout_code.order(ByteOrder.LITTLE_ENDIAN);
1✔
737

738
                int2test = bb_fileLayout_code.getInt();
1✔
739

740
                if (int2test == 2 || int2test == 3){
1✔
741
                    dbgLog.fine("The sav file was saved on a little endian machine");
1✔
742
                    dbgLog.fine("Reveral of the bytes is necessary to decode "+
1✔
743
                            "multi-byte, non-string blocks");
744
                            
745
                    isLittleEndian = true;
1✔
746
                    
747
                } else {
UNCOV
748
                    throw new IOException("reading recordType1:unknown file layout code="+int2test);
×
749
                }
750
            }
751

752
            dbgLog.fine("Endian of this platform:"+ByteOrder.nativeOrder().toString());
1✔
753

754
            // 1.3 4-byte Number_Of_OBS_Units_Per_Case 
755
            // (= how many RT2 records => how many varilables)
756
            
757
            offset_start = offset_end;
1✔
758
            offset_end += LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE; // 4 byte
1✔
759
            
760
            ByteBuffer bb_OBS_units_per_case  = ByteBuffer.wrap( 
1✔
761
                    recordType1, offset_start,LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE);
762
            
763
            if (isLittleEndian){
1✔
764
                bb_OBS_units_per_case.order(ByteOrder.LITTLE_ENDIAN);
1✔
765
            }
766
            
767
            
768
            OBSUnitsPerCase = bb_OBS_units_per_case.getInt();
1✔
769
            
770
            dbgLog.fine("RT1: OBSUnitsPerCase="+OBSUnitsPerCase);
1✔
771

772
            // 1.4 4-byte Compression_Switch
773
            
774
            offset_start = offset_end;
1✔
775
            offset_end += LENGTH_COMPRESSION_SWITCH; // 4 byte
1✔
776
            
777
            ByteBuffer bb_compression_switch  = ByteBuffer.wrap(recordType1, 
1✔
778
                    offset_start, LENGTH_COMPRESSION_SWITCH);
779
            
780
            if (isLittleEndian){
1✔
781
                bb_compression_switch.order(ByteOrder.LITTLE_ENDIAN);
1✔
782
            }
783
            
784
            int compression_switch = bb_compression_switch.getInt();
1✔
785
            if ( compression_switch == 0){
1✔
786
                // data section is not compressed
UNCOV
787
                isDataSectionCompressed = false;
×
UNCOV
788
                dbgLog.fine("data section is not compressed");
×
789
            } else {
790
                dbgLog.fine("data section is compressed:"+compression_switch);
1✔
791
            }
792
            
793
            // 1.5 4-byte Case-Weight Variable Index
794
            // warning: this variable index starts from 1, not 0
795
            
796
            offset_start = offset_end;
1✔
797
            offset_end += LENGTH_CASE_WEIGHT_VARIABLE_INDEX; // 4 byte
1✔
798
            
799
            ByteBuffer bb_Case_Weight_Variable_Index = ByteBuffer.wrap(recordType1, 
1✔
800
                    offset_start, LENGTH_CASE_WEIGHT_VARIABLE_INDEX);
801
            
802
            if (isLittleEndian){
1✔
803
                bb_Case_Weight_Variable_Index.order(ByteOrder.LITTLE_ENDIAN);
1✔
804
            }
805
            
806
            caseWeightVariableOBSIndex = bb_Case_Weight_Variable_Index.getInt();
1✔
807
            
808
            /// caseWeightVariableOBSIndex will be used later on to locate 
809
            /// the weight variable; so we'll be able to mark the corresponding
810
            /// variables properly. 
811
            // TODO: make sure case weight variables are properly handled! 
812
            // -- L.A. 4.0 beta
813
            ///smd.getFileInformation().put("caseWeightVariableOBSIndex", caseWeightVariableOBSIndex);
814

815
            // 1.6 4-byte Number of Cases
816

817
            offset_start = offset_end;
1✔
818
            offset_end += LENGTH_NUMBER_OF_CASES; // 4 byte
1✔
819
            
820
            ByteBuffer bb_Number_Of_Cases = ByteBuffer.wrap(recordType1, 
1✔
821
                    offset_start, LENGTH_NUMBER_OF_CASES);
822
            
823
            if (isLittleEndian){
1✔
824
                bb_Number_Of_Cases.order(ByteOrder.LITTLE_ENDIAN);
1✔
825
            }
826
            
827
            int numberOfCases = bb_Number_Of_Cases.getInt();
1✔
828
            
829
            if ( numberOfCases < 0){
1✔
830
                // -1 if numberOfCases is unknown
UNCOV
831
                throw new RuntimeException("number of cases is not recorded in the header");
×
832
            } else {
833
                dbgLog.fine("RT1: number of cases is recorded= "+numberOfCases);
1✔
834
                dataTable.setCaseQuantity(new Long(numberOfCases));
1✔
835
                ///caseQnty = numberOfCases;
836
                ///smd.getFileInformation().put("caseQnty", numberOfCases);
837
            }
838

839
            // 1.7 8-byte compression-bias [not long but double]
840
            
841
            offset_start = offset_end;
1✔
842
            offset_end += LENGTH_COMPRESSION_BIAS; // 8 byte
1✔
843
            
844
            ByteBuffer bb_compression_bias = ByteBuffer.wrap( 
1✔
845
                    Arrays.copyOfRange(recordType1, offset_start,
1✔
846
                offset_end));
847

848
            if (isLittleEndian){
1✔
849
               bb_compression_bias.order(ByteOrder.LITTLE_ENDIAN);
1✔
850
            }
851

852
            Double compressionBias = bb_compression_bias.getDouble();
1✔
853
            
854
            // TODO: 
855
            // check if this "compression bias" is being used anywhere? 
856
            // doesn't seem to be!
857
            // -- 4.0 alpha
858
            if ( compressionBias == 100d){
1✔
859
                // 100 is expected
860
                dbgLog.fine("compressionBias is 100 as expected");
1✔
861
                ///smd.getFileInformation().put("compressionBias", 100);
862
            } else {
UNCOV
863
                dbgLog.fine("compression bias is not 100: "+ compressionBias);
×
864
                ///smd.getFileInformation().put("compressionBias", compressionBias);
865
            }
866
            
867
            
868
            // 1.8 84-byte File Creation Information (date/time: dd MM yyhh:mm:ss +
869
            // 64-bytelabel)
870
            
871
            offset_start    = offset_end;
1✔
872
            offset_end += LENGTH_FILE_CREATION_INFO; // 84 bytes
1✔
873
            
874
            String fileCreationInfo = getNullStrippedString(new String(Arrays.copyOfRange(recordType1, offset_start,
1✔
875
                offset_end),"US-ASCII"));
876
                
877
            dbgLog.fine("fileCreationInfo:\n"+fileCreationInfo+"\n");
1✔
878
            
879
            String fileCreationDate = fileCreationInfo.substring(0,length_file_creation_date);
1✔
880
            int dateEnd = length_file_creation_date+length_file_creation_time;
1✔
881
            String fileCreationTime = fileCreationInfo.substring(length_file_creation_date,
1✔
882
                    (dateEnd));
883
            String fileCreationNote = fileCreationInfo.substring(dateEnd,length_file_creation_label);
1✔
884

885

886
            dbgLog.fine("fileDate="+ fileCreationDate);
1✔
887
            dbgLog.fine("fileTime="+ fileCreationTime);
1✔
888
            dbgLog.fine("fileNote"+ fileCreationNote);
1✔
889
            
890
            
UNCOV
891
        } catch (IOException ex) {
×
UNCOV
892
            throw ex; 
×
893
        }
1✔
894
        
895
        dbgLog.fine("decodeRecordType1(): end");
1✔
896
    }
1✔
897
    
898
    
899
    void decodeRecordType2(BufferedInputStream stream) throws IOException {
900
        dbgLog.fine("decodeRecordType2(): start");
1✔
901
        if (stream ==null){
1✔
UNCOV
902
            throw new IllegalArgumentException("stream == null!");
×
903
        }
904

905
        Map<String, String> printFormatNameTable = new LinkedHashMap<String, String>(); 
1✔
906
        Map<String, String> variableLabelMap = new LinkedHashMap<String, String>();
1✔
907
        Map<String, List<String>> missingValueTable = new LinkedHashMap<String, List<String>>();
1✔
908
        List<Integer> printFormatList = new ArrayList<Integer>();
1✔
909

910
        String caseWeightVariableName = null;
1✔
911
        int caseWeightVariableIndex = 0;
1✔
912

913

914
        boolean lastVariableIsExtendable = false;
1✔
915
        boolean extendedVariableMode = false;
1✔
916
        boolean obs255 = false;
1✔
917

918
        String lastVariableName = null;
1✔
919
        String lastExtendedVariable = null;
1✔
920

921

922
        // this field repeats as many as the number of variables in
923
        // this sav file
924

925
        // (note that the above statement is not technically correct, this
926
        //  record repeats not just for every variable in the file, but for
927
        //  every OBS (8 byte unit); i.e., if a string is split into multiple
928
        //  OBS units, each one will have its own RT2 record -- L.A.).
929

930
        // Each field constists of a fixed (32-byte) segment and
931
        // then a few variable segments:
932
        // if the variable has a label (3rd INT4 set to 1), then there's 4 more
933
        // bytes specifying the length of the label, and then that many bytes
934
        // holding the label itself (no more than 256).
935
        // Then if there are optional missing value units (4th INT4 set to 1)
936
        // there will be 3 more OBS units attached = 24 extra bytes.
937

938
        int variableCounter = 0;
1✔
939
        int obsSeqNumber = 0;
1✔
940

941
        int j;
942

943
        dbgLog.fine("RT2: Reading "+OBSUnitsPerCase+" OBS units.");
1✔
944

945
        for (j=0; j<OBSUnitsPerCase; j++){
1✔
946

947
            dbgLog.fine("RT2: "+j+"-th RT2 unit is being decoded.");
1✔
948
            // 2.0: read the fixed[=non-optional] 32-byte segment
949
            byte[] recordType2Fixed = new byte[LENGTH_RECORDTYPE2_FIXED];
1✔
950

951
            try {
952
                int nbytes = stream.read(recordType2Fixed, 0, LENGTH_RECORDTYPE2_FIXED);
1✔
953

954

955
                //printHexDump(recordType2Fixed, "recordType2 part 1");
956

957
                if (nbytes == 0){
1✔
UNCOV
958
                    throw new IOException("reading recordType2: no bytes read!");
×
959
                }
960

961
                int offset = 0;
1✔
962

963
                // 2.1: create int-view of the bytebuffer for the first 16-byte segment
964
                int rt2_1st_4_units = 4;
1✔
965
                ByteBuffer[] bb_record_type2_fixed_part1 = new ByteBuffer[rt2_1st_4_units];
1✔
966
                int[] recordType2FixedPart1 = new int[rt2_1st_4_units];
1✔
967
                for (int i= 0; i < rt2_1st_4_units;i++ ){
1✔
968

969
                    bb_record_type2_fixed_part1[i] =
1✔
970
                    ByteBuffer.wrap(recordType2Fixed, offset, LENGTH_SAV_INT_BLOCK);
1✔
971

972
                    offset +=LENGTH_SAV_INT_BLOCK;
1✔
973
                    if (isLittleEndian){
1✔
974
                        bb_record_type2_fixed_part1[i].order(ByteOrder.LITTLE_ENDIAN);
1✔
975
                    }
976
                    recordType2FixedPart1[i] = bb_record_type2_fixed_part1[i].getInt();
1✔
977
                }
978

979

980
                ///dbgLog.fine("recordType2FixedPart="+
981
                ///        ReflectionToStringBuilder.toString(recordType2FixedPart1, ToStringStyle.MULTI_LINE_STYLE));
982

983

984
                // 1st ([0]) element must be 2 otherwise no longer Record Type 2
985
                if (recordType2FixedPart1[0] != 2){
1✔
UNCOV
986
                    dbgLog.warning(j+"-th RT header value is no longet RT2! "+recordType2FixedPart1[0]);
×
UNCOV
987
                    break;
×
988
                }
989
                dbgLog.fine("variable type[must be 2]="+recordType2FixedPart1[0]);
1✔
990

991

992
                // 2.3 variable name: 8 byte(space[x20]-padded)
993
                // This field is located at the very end of the 32 byte
994
                // fixed-size RT2 header (bytes 24-31).
995
                // We are processing it now, so that
996
                // we can make the decision on whether this variable is part
997
                // of a compound variable:
998

999
                String RawVariableName = getNullStrippedString(new String(Arrays.copyOfRange(recordType2Fixed, 24, (24+LENGTH_VARIABLE_NAME)),defaultCharSet));
1✔
1000
                //offset +=LENGTH_VARIABLE_NAME;
1001
                String variableName = null;
1✔
1002
                if (RawVariableName.indexOf(' ') >= 0){
1✔
1003
                    variableName = RawVariableName.substring(0, RawVariableName.indexOf(' '));
1✔
1004
                } else {
UNCOV
1005
                    variableName = RawVariableName;
×
1006
                }
1007

1008

1009
                // 2nd ([1]) element: numeric variable = 0 :for string variable
1010
                // this block indicates its datum-length, i.e, >0 ;
1011
                // if -1, this RT2 unit is a non-1st RT2 unit for a string variable
1012
                // whose value is longer than 8 character.
1013

1014
                boolean isNumericVariable = false;
1✔
1015

1016
                dbgLog.fine("variable type(0: numeric; > 0: String;-1 continue )="+recordType2FixedPart1[1]);
1✔
1017

1018
                //OBSwiseTypelList.add(recordType2FixedPart1[1]);
1019

1020
                int HowManyRt2Units=1;
1✔
1021

1022

1023
                if (recordType2FixedPart1[1] == -1) {
1✔
1024
                    dbgLog.fine("this RT2 is an 8 bit continuation chunk of an earlier string variable");
×
1025
                    if ( obs255 ) {
×
UNCOV
1026
                        if ( obsSeqNumber < 30 ) {
×
1027
                            OBSwiseTypelList.add(recordType2FixedPart1[1]);
×
1028
                            obsSeqNumber++;
×
1029
                        } else {
UNCOV
1030
                            OBSwiseTypelList.add(-2);
×
UNCOV
1031
                            obs255 = false;
×
1032
                            obsSeqNumber = 0;
×
1033
                        }
1034
                    } else {
1035
                        OBSwiseTypelList.add(recordType2FixedPart1[1]);
×
1036
                    }
1037

UNCOV
1038
                    obsNonVariableBlockSet.add(j);
×
UNCOV
1039
                    continue;
×
1040
                } else if (recordType2FixedPart1[1] == 0){
1✔
1041
                    // This is a numeric variable
1042
                    extendedVariableMode = false;
1✔
1043
                    // And as such, it cannot be an extension of a
1044
                    // previous, long string variable.
1045
                    OBSwiseTypelList.add(recordType2FixedPart1[1]);
1✔
1046
                    variableCounter++;
1✔
1047
                    isNumericVariable = true;
1✔
1048
                    variableTypelList.add(recordType2FixedPart1[1]);
1✔
1049
                } else if (recordType2FixedPart1[1] > 0){
1✔
1050

1051
                    // This looks like a regular string variable. However,
1052
                    // it may still be a part of a compound variable
1053
                    // (a String > 255 bytes that was split into 255 byte
1054
                    // chunks, stored as individual String variables).
1055

1056
                    if (recordType2FixedPart1[1] == 255){
1✔
UNCOV
1057
                        obs255 = true;
×
1058
                    }
1059

1060
                    if ( lastVariableIsExtendable ) {
1✔
UNCOV
1061
                        String varNameBase = null;
×
1062
                        if ( lastVariableName.length() > 5 ) {
×
UNCOV
1063
                            varNameBase = lastVariableName.substring (0, 5);
×
1064
                        } else {
1065
                            varNameBase = lastVariableName;
×
1066
                        }
1067

1068
                        if ( extendedVariableMode ) {
×
UNCOV
1069
                            if ( variableNameIsAnIncrement ( varNameBase, lastExtendedVariable, variableName ) ) {
×
UNCOV
1070
                                OBSwiseTypelList.add(-1);
×
UNCOV
1071
                                lastExtendedVariable = variableName;
×
1072
                                // OK, we stay in the "extended variable" mode;
1073
                                // but we can't move on to the next OBS (hence the commented out
1074
                                // "continue" below:
1075
                                //continue;
1076
                                // see the next comment below for the explanation.
1077
                                //
1078
                                // Should we also set "extendable" flag to false at this point
1079
                                // if it's shorter than 255 bytes, i.e. the last extended chunk?
1080
                            } else {
1081
                                extendedVariableMode = false;
×
1082
                            }
1083
                        } else {
1084
                            if ( variableNameIsAnIncrement ( varNameBase, variableName ) ) {
×
1085
                                OBSwiseTypelList.add(-1);
×
UNCOV
1086
                                extendedVariableMode = true;
×
UNCOV
1087
                                dbgLog.fine("RT2: in extended variable mode; variable "+variableName);
×
UNCOV
1088
                                lastExtendedVariable = variableName;
×
1089
                                // Before we move on to the next OBS unit, we need to check
1090
                                // if this current extended variable has its own label specified;
1091
                                // If so, we need to determine its length, then read and skip
1092
                                // that many bytes.
1093
                                // Hence the commented out "continue" below:
1094
                                //continue;
1095
                            }
1096
                        }
1097
                    }
1098

1099
                    if ( !extendedVariableMode) {
1✔
1100
                        // OK, this is a "real"
1101
                        // string variable, and not a continuation chunk of a compound
1102
                        // string.
1103

1104
                        OBSwiseTypelList.add(recordType2FixedPart1[1]);
1✔
1105
                        variableCounter++;
1✔
1106

1107
                        if (recordType2FixedPart1[1] == 255){
1✔
1108
                            // This variable is 255 bytes long, i.e. this is
1109
                            // either the single "atomic" variable of the
1110
                            // max allowed size, or it's a 255 byte segment
1111
                            // of a compound variable. So we will check
1112
                            // the next variable and see if it is the continuation
1113
                            // of this one.
1114

UNCOV
1115
                            lastVariableIsExtendable = true;
×
1116
                        } else {
1117
                            lastVariableIsExtendable = false;
1✔
1118
                        }
1119

1120
                        if (recordType2FixedPart1[1] % LENGTH_SAV_OBS_BLOCK == 0){
1✔
UNCOV
1121
                            HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK;
×
1122
                        } else {
1123
                            HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK +1;
1✔
1124
                        }
1125
                        variableTypelList.add(recordType2FixedPart1[1]);
1✔
1126
                    }
1127
                }
1128

1129
                if ( !extendedVariableMode ) {
1✔
1130
                    // Again, we only want to do the following steps for the "real"
1131
                    // variables, not the chunks of split mega-variables:
1132

1133
                    dbgLog.fine("RT2: HowManyRt2Units for this variable="+HowManyRt2Units);
1✔
1134

1135
                    lastVariableName = variableName;
1✔
1136

1137
                    // caseWeightVariableOBSIndex starts from 1: 0 is used for does-not-exist cases
1138
                    if (j == (caseWeightVariableOBSIndex - 1)){
1✔
UNCOV
1139
                        caseWeightVariableName = variableName;
×
1140
                        // TODO: do we need this "index"? -- 4.0 alpha
UNCOV
1141
                        caseWeightVariableIndex = variableCounter;
×
1142

1143
                        ///smd.setCaseWeightVariableName(caseWeightVariableName);
1144
                        ///smd.getFileInformation().put("caseWeightVariableIndex", caseWeightVariableIndex);
1145
                    }
1146

1147
                    OBSIndexToVariableName.put(j, variableName);
1✔
1148

1149
                    //dbgLog.fine("\nvariable name="+variableName+"<-");
1150
                    dbgLog.fine("RT2: "+j+"-th variable name="+variableName+"<-");
1✔
1151
                    dbgLog.fine("RT2: raw variable: "+RawVariableName);
1✔
1152

1153
                    variableNameList.add(variableName);
1✔
1154
                }
1155

1156

1157

1158
                // 3rd ([2]) element: = 1 variable-label block follows; 0 = no label
1159
                //
1160
                dbgLog.fine("RT: variable label follows?(1:yes; 0: no)="+recordType2FixedPart1[2]);
1✔
1161
                boolean hasVariableLabel = recordType2FixedPart1[2] == 1 ? true : false;
1✔
1162
                if ((recordType2FixedPart1[2] != 0) && (recordType2FixedPart1[2] != 1)) {
1✔
UNCOV
1163
                    throw new IOException("RT2: reading error: value is neither 0 or 1"+
×
1164
                            recordType2FixedPart1[2]);
1165
                }
1166

1167
                // 2.4 [optional]The length of a variable label followed: 4-byte int
1168
                // 3rd element of 2.1 indicates whether this field exists
1169
                // *** warning: The label block is padded to a multiple of the 4-byte
1170
                // NOT the raw integer value of this 4-byte block
1171

1172

1173
                if (hasVariableLabel){
1✔
1174
                    byte[] length_variable_label= new byte[4];
1✔
1175
                    int nbytes_2_4 = stream.read(length_variable_label);
1✔
1176
                    if (nbytes_2_4 == 0){
1✔
UNCOV
1177
                        throw new IOException("RT 2: error reading recordType2.4: no bytes read!");
×
1178
                    } else {
1179
                        dbgLog.fine("nbytes_2_4="+nbytes_2_4);
1✔
1180
                    }
1181
                    ByteBuffer bb_length_variable_label = ByteBuffer.wrap(
1✔
1182
                            length_variable_label, 0, LENGTH_VARIABLE_LABEL);
1183
                    if (isLittleEndian){
1✔
1184
                        bb_length_variable_label.order(ByteOrder.LITTLE_ENDIAN);
1✔
1185
                    }
1186
                    int rawVariableLabelLength = bb_length_variable_label.getInt();
1✔
1187

1188
                    dbgLog.fine("rawVariableLabelLength="+rawVariableLabelLength);
1✔
1189
                    int variableLabelLength = getSAVintAdjustedBlockLength(rawVariableLabelLength);
1✔
1190
                    dbgLog.fine("RT2: variableLabelLength="+variableLabelLength);
1✔
1191

1192
                    // 2.5 [optional]variable label whose length is found at 2.4
1193

1194
                    String variableLabel = "";
1✔
1195

1196
                    if (rawVariableLabelLength > 0) {
1✔
1197
                    byte[] variable_label = new byte[variableLabelLength];
1✔
1198
                    int nbytes_2_5 = stream.read(variable_label);
1✔
1199
                    if (nbytes_2_5 == 0){
1✔
UNCOV
1200
                            throw new IOException("RT 2: error reading recordType2.5: "
×
1201
                                    +variableLabelLength+" bytes requested, no bytes read!");
1202
                    } else {
1203
                        dbgLog.fine("nbytes_2_5="+nbytes_2_5);
1✔
1204
                    }
1205
                        variableLabel = getNullStrippedString(new String(Arrays.copyOfRange(variable_label,
1✔
1206
                                0, rawVariableLabelLength),defaultCharSet));
1207
                        dbgLog.fine("RT2: variableLabel="+variableLabel+"<-");
1✔
1208

1209
                        dbgLog.fine(variableName + " => " + variableLabel);
1✔
1210
                    } else {
1✔
UNCOV
1211
                        dbgLog.fine("RT2: defaulting to empty variable label.");
×
1212
                    }
1213
                    
1214
                    if (!extendedVariableMode) {
1✔
1215
                    // We only have any use for this label if it's a "real" variable.
1216
                    // Thinking about it, it doesn't make much sense for the "fake"
1217
                    // variables that are actually chunks of large strings to store
1218
                    // their own labels. But in some files they do. Then failing to read
1219
                    // the bytes would result in getting out of sync with the RT record
1220
                    // borders. So we always read the bytes, but only use them for
1221
                    // the real variable entries.
1222
                        /*String variableLabel = new String(Arrays.copyOfRange(variable_label,
1223
                                0, rawVariableLabelLength),"US-ASCII");*/
1224

1225
                        variableLabelMap.put(variableName, variableLabel);
1✔
1226
                    }
1227
                }
1228

1229
                if (extendedVariableMode) {
1✔
1230
                // there's nothing else left for us to do in this iteration of the loop.
1231
                // Once again, this was not a real variable, but a dummy variable entry
1232
                // created for a chunk of a string variable longer than 255 bytes --
1233
                // that's how SPSS stores them.
UNCOV
1234
                    continue;
×
1235
                }
1236

1237
                // 4th ([3]) element: Missing value type code
1238
                // 0[none], 1, 2, 3 [point-type],-2[range], -3 [range type+ point]
1239

1240
                dbgLog.fine("RT: missing value unit follows?(if 0, none)="+recordType2FixedPart1[3]);
1✔
1241
                boolean hasMissingValues =
1242
                        (validMissingValueCodeSet.contains(
1✔
1243
                                recordType2FixedPart1[3]) && (recordType2FixedPart1[3] !=0)) ?
1✔
1244
                        true : false;
1✔
1245

1246
                InvalidData invalidDataInfo = null;
1✔
1247

1248
                if (recordType2FixedPart1[3] !=0){
1✔
UNCOV
1249
                    invalidDataInfo = new InvalidData(recordType2FixedPart1[3]);
×
UNCOV
1250
                    dbgLog.fine("RT: missing value type="+invalidDataInfo.getType());
×
1251
                }
1252

1253
                // 2.2: print/write formats: 4-byte each = 8 bytes
1254

1255
                byte[] printFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset+
1✔
1256
                        LENGTH_PRINT_FORMAT_CODE);
1257
                dbgLog.fine("printFrmt="+new String (Hex.encodeHex(printFormt)));
1✔
1258

1259

1260
                offset +=LENGTH_PRINT_FORMAT_CODE;
1✔
1261
                int formatCode = isLittleEndian ? printFormt[2] : printFormt[1];
1✔
1262
                int formatWidth = isLittleEndian ? printFormt[1] : printFormt[2];
1✔
1263
                
1264
                // TODO: 
1265
                // What should we be doing with these "format decimal positions" 
1266
                // in 4.0? 
1267
                // -- L.A. 4.0 alpha
1268
                
1269
                int formatDecimalPointPosition = isLittleEndian ? printFormt[0] : printFormt[3];
1✔
1270
                dbgLog.fine("RT2: format code{5=F, 1=A[String]}="+formatCode);
1✔
1271

1272
                formatDecimalPointPositionList.add(formatDecimalPointPosition);
1✔
1273

1274

1275
                if (!SPSSConstants.FORMAT_CODE_TABLE_SAV.containsKey(formatCode)){
1✔
UNCOV
1276
                    throw new IOException("Unknown format code was found = "
×
1277
                            + formatCode);
1278
                } else{
1279
                    printFormatList.add(formatCode);
1✔
1280
                }
1281

1282
                byte[] writeFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset+
1✔
1283
                        LENGTH_WRITE_FORMAT_CODE);
1284

1285
                dbgLog.fine("RT2: writeFrmt="+new String (Hex.encodeHex(writeFormt)));
1✔
1286
                if (writeFormt[3] != 0x00){
1✔
UNCOV
1287
                    dbgLog.fine("byte-order(write format): reversal required");
×
1288
                }
1289

1290
                offset +=LENGTH_WRITE_FORMAT_CODE;
1✔
1291

1292
                if (!SPSSConstants.ORDINARY_FORMAT_CODE_SET.contains(formatCode)) {
1✔
1293
                    StringBuilder sb = new StringBuilder(
×
1294
                    SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode)+
×
1295
                            formatWidth);
1296
                    if (formatDecimalPointPosition > 0){
×
1297
                        sb.append("."+ formatDecimalPointPosition);
×
1298
                    }
UNCOV
1299
                    dbgLog.fine("formattable[i] = " + variableName + " -> " + sb.toString());
×
UNCOV
1300
                    printFormatNameTable.put(variableName, sb.toString());
×
1301

1302
                }
1303

1304
                printFormatTable.put(variableName, SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode));
1✔
1305

1306

1307
                // 2.6 [optional] missing values:4-byte each if exists
1308
                // 4th element of 2.1 indicates the structure of this sub-field
1309

1310
                // Should we perhaps check for this for the "fake" variables too?
1311
                //
1312

1313
                if (hasMissingValues) {
1✔
UNCOV
1314
                    dbgLog.fine("RT2: decoding missing value: type="+recordType2FixedPart1[3]);
×
1315
                    int howManyMissingValueUnits = missingValueCodeUnits.get(recordType2FixedPart1[3]);
×
1316
                    //int howManyMissingValueUnits = recordType2FixedPart1[3] > 0 ? recordType2FixedPart1[3] :  0;
1317

1318
                    dbgLog.fine("RT2: howManyMissingValueUnits="+howManyMissingValueUnits);
×
1319

1320
                    byte[] missing_value_code_units = new byte[LENGTH_SAV_OBS_BLOCK*howManyMissingValueUnits];
×
1321
                    int nbytes_2_6 = stream.read(missing_value_code_units);
×
1322

1323
                    if (nbytes_2_6 == 0){
×
UNCOV
1324
                        throw new IOException("RT 2: reading recordType2.6: no byte was read");
×
1325
                    } else {
UNCOV
1326
                        dbgLog.fine("nbytes_2_6="+nbytes_2_6);
×
1327
                    }
1328

1329
                    //printHexDump(missing_value_code_units, "missing value");
1330

UNCOV
1331
                    if (isNumericVariable){
×
1332

UNCOV
1333
                        double[] missingValues = new double[howManyMissingValueUnits];
×
1334
                        //List<String> mvp = new ArrayList<String>();
UNCOV
1335
                        List<String> mv = new ArrayList<String>();
×
1336

1337
                        ByteBuffer[] bb_missig_value_code =
×
1338
                            new ByteBuffer[howManyMissingValueUnits];
1339

UNCOV
1340
                        int offset_start = 0;
×
1341

1342
                        for (int i= 0; i < howManyMissingValueUnits;i++ ){
×
1343

UNCOV
1344
                            bb_missig_value_code[i]  =
×
1345
                                    ByteBuffer.wrap(missing_value_code_units, offset_start,
×
1346
                                    LENGTH_SAV_OBS_BLOCK);
1347

UNCOV
1348
                            offset_start +=LENGTH_SAV_OBS_BLOCK;
×
UNCOV
1349
                            if (isLittleEndian){
×
1350
                                bb_missig_value_code[i].order(ByteOrder.LITTLE_ENDIAN);
×
1351
                            }
1352

1353
                            ByteBuffer temp = bb_missig_value_code[i].duplicate();
×
1354

1355

1356
                            missingValues[i] = bb_missig_value_code[i].getDouble();
×
1357
                            if (Double.toHexString(missingValues[i]).equals("-0x1.ffffffffffffep1023")){
×
1358
                                dbgLog.fine("1st value is LOWEST");
×
1359
                                mv.add(Double.toHexString(missingValues[i]));
×
UNCOV
1360
                            } else if (Double.valueOf(missingValues[i]).equals(Double.MAX_VALUE)){
×
1361
                                dbgLog.fine("2nd value is HIGHEST");
×
UNCOV
1362
                                mv.add(Double.toHexString(missingValues[i]));
×
1363
                            } else {
UNCOV
1364
                                mv.add(doubleNumberFormatter.format(missingValues[i]));
×
1365
                            }
1366
                            dbgLog.fine(i+"-th missing value="+Double.toHexString(missingValues[i]));
×
1367
                        }
1368

1369
                        dbgLog.fine("variableName="+variableName);
×
1370
                        if (recordType2FixedPart1[3] > 0) {
×
1371
                            // point cases only
1372
                            dbgLog.fine("mv(>0)="+mv);
×
1373
                            missingValueTable.put(variableName, mv);
×
UNCOV
1374
                            invalidDataInfo.setInvalidValues(mv);
×
1375
                        } else if (recordType2FixedPart1[3]== -2) {
×
1376
                            dbgLog.fine("mv(-2)="+mv);
×
1377
                            // range
1378
                            invalidDataInfo.setInvalidRange(mv);
×
1379
                        } else if (recordType2FixedPart1[3]== -3){
×
1380
                            // mixed case
1381
                            dbgLog.fine("mv(-3)="+mv);
×
UNCOV
1382
                            invalidDataInfo.setInvalidRange(mv.subList(0, 2));
×
UNCOV
1383
                            invalidDataInfo.setInvalidValues(mv.subList(2, 3));
×
1384
                            missingValueTable.put(variableName, mv.subList(2, 3));
×
1385
                        }
1386

1387
                        dbgLog.fine("missing value="+
×
1388
                                StringUtils.join(missingValueTable.get(variableName),"|"));
×
UNCOV
1389
                        dbgLog.fine("invalidDataInfo(Numeric):\n"+invalidDataInfo);
×
1390
                        invalidDataTable.put(variableName, invalidDataInfo);
×
1391
                    } else {
×
1392
                        // string variable case
1393
                        String[] missingValues = new String[howManyMissingValueUnits];
×
1394
                        List<String> mv = new ArrayList<String>();
×
UNCOV
1395
                        int offset_start = 0;
×
1396
                        int offset_end   = LENGTH_SAV_OBS_BLOCK;
×
1397
                        for (int i= 0; i < howManyMissingValueUnits;i++ ){
×
1398

1399
                            missingValues[i] =
×
UNCOV
1400
                                    StringUtils.stripEnd(new
×
1401
                            String(Arrays.copyOfRange(missing_value_code_units, offset_start, offset_end),defaultCharSet), " ");
×
1402
                            dbgLog.fine("missing value="+missingValues[i]+"<-");
×
1403

1404
                            offset_start = offset_end;
×
UNCOV
1405
                            offset_end +=LENGTH_SAV_OBS_BLOCK;
×
1406

1407
                            mv.add(missingValues[i]);
×
1408
                        }
1409
                        invalidDataInfo.setInvalidValues(mv);
×
1410
                        missingValueTable.put(variableName, mv);
×
1411
                        invalidDataTable.put(variableName, invalidDataInfo);
×
UNCOV
1412
                        dbgLog.fine("missing value(str)="+
×
UNCOV
1413
                                StringUtils.join(missingValueTable.get(variableName),"|"));
×
1414
                        dbgLog.fine("invalidDataInfo(String):\n"+invalidDataInfo);
×
1415

1416
                    } // string case
1417
                    dbgLog.fine("invalidDataTable:\n"+invalidDataTable);
×
1418
                } // if msv
1419

1420
            } catch (IOException ex){
×
1421
                //ex.printStackTrace();
UNCOV
1422
                throw ex;
×
UNCOV
1423
            } catch (Exception ex){
×
UNCOV
1424
                ex.printStackTrace();
×
1425
                // should we be throwing some exception here?
1426
            }
1✔
1427
        } // j-loop
1428

1429
        if (j != OBSUnitsPerCase ) {
1✔
UNCOV
1430
            dbgLog.fine("RT2: attention! didn't reach the end of the OBS list!");
×
UNCOV
1431
            throw new IOException("RT2: didn't reach the end of the OBS list!");
×
1432
        }
1433
        
1434
        dbgLog.fine("RT2 metadata-related exit-chores");
1✔
1435
        ///smd.getFileInformation().put("varQnty", variableCounter);
1436
        dataTable.setVarQuantity(new Long(variableCounter));
1✔
1437
        dbgLog.fine("RT2: varQnty=" + variableCounter);
1✔
1438

1439
        // 4.0 Initialize variables: 
1440
        List<DataVariable> variableList = new ArrayList<DataVariable>();
1✔
1441

1442
        for (int i = 0; i < variableCounter; i++) {
1✔
1443
            DataVariable dv = new DataVariable(i, dataTable);
1✔
1444
            String varName = variableNameList.get(i);
1✔
1445
            dbgLog.fine("name: "+varName);
1✔
1446
            dv.setName(varName);
1✔
1447
            String varLabel = variableLabelMap.get(varName);
1✔
1448
            if (varLabel != null && varLabel.length() > 255) {
1✔
1449
                // TODO: 
1450
                // variable labels will be changed into type 'TEXT' in the 
1451
                // database - this will eliminate the 255 char. limit. 
1452
                // -- L.A. 4.0 beta11
UNCOV
1453
                dbgLog.fine("Have to truncate label: "+varLabel);
×
UNCOV
1454
                varLabel = varLabel.substring(0, 255);
×
1455
            }
1456
            dbgLog.fine("label: "+varLabel);
1✔
1457
            dv.setLabel(varLabel);
1✔
1458
            variableList.add(dv);
1✔
1459

1460
        }
1461

1462
        dataTable.setDataVariables(variableList);
1✔
1463

1464
        ///smd.setVariableName(variableNameList.toArray(new String[variableNameList.size()]));
1465
        ///smd.setVariableLabel(variableLabelMap);
1466
        // TODO: 
1467
        // figure out what to do with the missing value table!
1468
        // -- 4.0 alpha
1469
        // well, they were used to generate merged summary statistics for 
1470
        // the variable. So need to verify what the DDI import was doing 
1471
        // with them and replicate the same in 4.0.
1472
        // (add appropriate value labels?)
1473
        ///TODO: 4.0 smd.setMissingValueTable(missingValueTable);
1474
        ///smd.getFileInformation().put("caseWeightVariableName", caseWeightVariableName);
1475

1476
        dbgLog.fine("sumstat:long case=" + Arrays.deepToString(variableTypelList.toArray()));
1✔
1477

1478
        dbgLog.fine("RT2: OBSwiseTypelList=" + OBSwiseTypelList);
1✔
1479

1480
        dbgLog.fine("decodeRecordType2(): end");
1✔
1481
    }
1✔
1482
    
1483
    void decodeRecordType3and4(BufferedInputStream stream) throws IOException {
1484
        dbgLog.fine("decodeRecordType3and4(): start");
1✔
1485
        Map<String, Map<String, String>> valueLabelTable
1✔
1486
                = new LinkedHashMap<String, Map<String, String>>();
1487

1488
        int safteyCounter = 0;
1✔
1489
        while (true) {
1490
            try {
1491
                if (stream == null) {
1✔
UNCOV
1492
                    throw new IllegalArgumentException("stream == null!");
×
1493
                }
1494
                // this secton may not exit so first check the 4-byte header value
1495
                //if (stream.markSupported()){
1496
                stream.mark(1000);
1✔
1497
                //}
1498
                // 3.0 check the first 4 bytes
1499
                byte[] headerCode = new byte[LENGTH_RECORD_TYPE3_CODE];
1✔
1500

1501
                int nbytes_rt3 = stream.read(headerCode, 0, LENGTH_RECORD_TYPE3_CODE);
1✔
1502
                // to-do check against nbytes
1503
                //printHexDump(headerCode, "RT3 header test");
1504
                ByteBuffer bb_header_code = ByteBuffer.wrap(headerCode,
1✔
1505
                        0, LENGTH_RECORD_TYPE3_CODE);
1506
                if (isLittleEndian) {
1✔
1507
                    bb_header_code.order(ByteOrder.LITTLE_ENDIAN);
1✔
1508
                }
1509

1510
                int intRT3test = bb_header_code.getInt();
1✔
1511
                dbgLog.fine("header test value: RT3=" + intRT3test);
1✔
1512
                if (intRT3test != 3) {
1✔
1513
                    //if (stream.markSupported()){
1514
                    dbgLog.fine("iteration=" + safteyCounter);
1✔
1515

1516
                    // We have encountered a record that's not type 3. This means we've
1517
                    // processed all the type 3/4 record pairs. So we want to rewind
1518
                    // the stream and return -- so that the appropriate record type
1519
                    // reader can be called on it.
1520
                    // But before we return, we need to save all the value labels
1521
                    // we have found:
1522
                    //smd.setValueLabelTable(valueLabelTable);
1523
                    assignValueLabels(valueLabelTable);
1✔
1524

1525
                    stream.reset();
1✔
1526
                    return;
1✔
1527
                    //}
1528
                }
1529
                // 3.1 how many value-label pairs follow
1530
                byte[] number_of_labels = new byte[LENGTH_RT3_HOW_MANY_LABELS];
1✔
1531

1532
                int nbytes_3_1 = stream.read(number_of_labels);
1✔
1533
                if (nbytes_3_1 == 0) {
1✔
UNCOV
1534
                    throw new IOException("RT 3: reading recordType3.1: no byte was read");
×
1535
                }
1536
                ByteBuffer bb_number_of_labels = ByteBuffer.wrap(number_of_labels,
1✔
1537
                        0, LENGTH_RT3_HOW_MANY_LABELS);
1538
                if (isLittleEndian) {
1✔
1539
                    bb_number_of_labels.order(ByteOrder.LITTLE_ENDIAN);
1✔
1540
                }
1541

1542
                int numberOfValueLabels = bb_number_of_labels.getInt();
1✔
1543
                dbgLog.fine("number of value-label pairs=" + numberOfValueLabels);
1✔
1544

1545
                ByteBuffer[] tempBB = new ByteBuffer[numberOfValueLabels];
1✔
1546

1547
                String valueLabel[] = new String[numberOfValueLabels];
1✔
1548

1549
                for (int i = 0; i < numberOfValueLabels; i++) {
1✔
1550

1551
                    // read 8-byte as value                    
1552
                    byte[] value = new byte[LENGTH_RT3_VALUE];
1✔
1553
                    int nbytes_3_value = stream.read(value);
1✔
1554

1555
                    if (nbytes_3_value == 0) {
1✔
UNCOV
1556
                        throw new IOException("RT 3: reading recordType3 value: no byte was read");
×
1557
                    }
1558
                    // note these 8 bytes are interpreted later
1559
                    // currently no information about which variable's (=> type unknown)
1560
                    ByteBuffer bb_value = ByteBuffer.wrap(value,
1✔
1561
                            0, LENGTH_RT3_VALUE);
1562
                    if (isLittleEndian) {
1✔
1563
                        bb_value.order(ByteOrder.LITTLE_ENDIAN);
1✔
1564
                    }
1565
                    tempBB[i] = bb_value;
1✔
1566
                    dbgLog.fine("bb_value=" + Hex.encodeHex(bb_value.array()));
1✔
1567
                    /*
1568
                     double valueD = bb_value.getDouble();                
1569
                     dbgLog.fine("value="+valueD);
1570
                     */
1571
                    // read 1st byte as unsigned integer = label_length
1572

1573
                    // read label_length byte as label
1574
                    byte[] labelLengthByte = new byte[LENGTH_RT3_LABEL_LENGTH];
1✔
1575

1576
                    int nbytes_3_label_length = stream.read(labelLengthByte);
1✔
1577

1578
                    // add check-routine here
1579
                    dbgLog.fine("labelLengthByte" + Hex.encodeHex(labelLengthByte));
1✔
1580
                    dbgLog.fine("label length = " + labelLengthByte[0]);
1✔
1581
                    // the net-length of a value label is saved as
1582
                    // unsigned byte; however, the length is less than 127
1583
                    // byte should be ok
1584
                    int rawLabelLength = labelLengthByte[0] & 0xFF;
1✔
1585
                    dbgLog.fine("rawLabelLength=" + rawLabelLength);
1✔
1586
                    // -1 =>1-byte already read
1587
                    int labelLength = getSAVobsAdjustedBlockLength(rawLabelLength + 1) - 1;
1✔
1588
                    byte[] valueLabelBytes = new byte[labelLength];
1✔
1589
                    int nbytes_3_value_label = stream.read(valueLabelBytes);
1✔
1590

1591
                    // ByteBuffer bb_label = ByteBuffer.wrap(valueLabel,0,labelLength);
1592
                    valueLabel[i] = StringUtils.stripEnd(new String(Arrays.copyOfRange(valueLabelBytes, 0, rawLabelLength), defaultCharSet), " ");
1✔
1593
                    dbgLog.fine(i + "-th valueLabel=" + valueLabel[i] + "<-");
1✔
1594

1595
                } // iter rt3
1596

1597
                dbgLog.fine("end of RT3 block");
1✔
1598
                dbgLog.fine("start of RT4 block");
1✔
1599

1600
                // 4.0 check the first 4 bytes
1601
                byte[] headerCode4 = new byte[LENGTH_RECORD_TYPE4_CODE];
1✔
1602

1603
                int nbytes_rt4 = stream.read(headerCode4, 0, LENGTH_RECORD_TYPE4_CODE);
1✔
1604

1605
                if (nbytes_rt4 == 0) {
1✔
UNCOV
1606
                    throw new IOException("RT4: reading recordType4 value: no byte was read");
×
1607
                }
1608

1609
                //printHexDump(headerCode4, "RT4 header test");
1610
                ByteBuffer bb_header_code_4 = ByteBuffer.wrap(headerCode4,
1✔
1611
                        0, LENGTH_RECORD_TYPE4_CODE);
1612
                if (isLittleEndian) {
1✔
1613
                    bb_header_code_4.order(ByteOrder.LITTLE_ENDIAN);
1✔
1614
                }
1615

1616
                int intRT4test = bb_header_code_4.getInt();
1✔
1617
                dbgLog.fine("header test value: RT4=" + intRT4test);
1✔
1618

1619
                if (intRT4test != 4) {
1✔
UNCOV
1620
                    throw new IOException("RT 4: reading recordType4 header: no byte was read");
×
1621
                }
1622

1623
                // 4.1 read the how-many-variables bytes
1624
                byte[] howManyVariablesfollow = new byte[LENGTH_RT4_HOW_MANY_VARIABLES];
1✔
1625

1626
                int nbytes_rt4_1 = stream.read(howManyVariablesfollow, 0, LENGTH_RT4_HOW_MANY_VARIABLES);
1✔
1627

1628
                ByteBuffer bb_howManyVariablesfollow = ByteBuffer.wrap(howManyVariablesfollow,
1✔
1629
                        0, LENGTH_RT4_HOW_MANY_VARIABLES);
1630
                if (isLittleEndian) {
1✔
1631
                    bb_howManyVariablesfollow.order(ByteOrder.LITTLE_ENDIAN);
1✔
1632
                }
1633

1634
                int howManyVariablesRT4 = bb_howManyVariablesfollow.getInt();
1✔
1635
                dbgLog.fine("how many variables follow: RT4=" + howManyVariablesRT4);
1✔
1636

1637
                int length_indicies = LENGTH_RT4_VARIABLE_INDEX * howManyVariablesRT4;
1✔
1638
                byte[] variableIdicesBytes = new byte[length_indicies];
1✔
1639

1640
                int nbytes_rt4_2 = stream.read(variableIdicesBytes, 0, length_indicies);
1✔
1641

1642
                // !!!!! Caution: variableIndex in RT4 starts from 1 NOT ** 0 **
1643
                int[] variableIndex = new int[howManyVariablesRT4];
1✔
1644
                int offset = 0;
1✔
1645
                for (int i = 0; i < howManyVariablesRT4; i++) {
1✔
1646

1647
                    ByteBuffer bb_variable_index = ByteBuffer.wrap(variableIdicesBytes,
1✔
1648
                            offset, LENGTH_RT4_VARIABLE_INDEX);
1649
                    offset += LENGTH_RT4_VARIABLE_INDEX;
1✔
1650

1651
                    if (isLittleEndian) {
1✔
1652
                        bb_variable_index.order(ByteOrder.LITTLE_ENDIAN);
1✔
1653
                    }
1654

1655
                    variableIndex[i] = bb_variable_index.getInt();
1✔
1656
                    dbgLog.fine(i + "-th variable index number=" + variableIndex[i]);
1✔
1657
                }
1658

1659
                dbgLog.fine("variable index set=" + ArrayUtils.toString(variableIndex));
1✔
1660
                dbgLog.fine("subtract 1 from variableIndex for getting a variable info");
1✔
1661

1662
                boolean isNumeric = OBSwiseTypelList.get(variableIndex[0] - 1) == 0 ? true : false;
1✔
1663

1664
                Map<String, String> valueLabelPair = new LinkedHashMap<String, String>();
1✔
1665
                if (isNumeric) {
1✔
1666
                    // numeric variable
1667
                    dbgLog.fine("processing of a numeric value-label table");
1✔
1668
                    for (int j = 0; j < numberOfValueLabels; j++) {
1✔
1669
                        valueLabelPair.put(doubleNumberFormatter.format(tempBB[j].getDouble()), valueLabel[j]);
1✔
1670
                    }
1671
                } else {
1672
                    // String variable
1673
                    dbgLog.fine("processing of a string value-label table");
1✔
1674
                    for (int j = 0; j < numberOfValueLabels; j++) {
1✔
1675
                        valueLabelPair.put(
1✔
1676
                                StringUtils.stripEnd(new String((tempBB[j].array()), defaultCharSet), " "), valueLabel[j]);
1✔
1677
                    }
1678
                }
1679

1680
                dbgLog.fine("valueLabePair=" + valueLabelPair);
1✔
1681
                dbgLog.fine("key variable's (raw) index =" + variableIndex[0]);
1✔
1682

1683
                valueLabelTable.put(OBSIndexToVariableName.get(variableIndex[0] - 1), valueLabelPair);
1✔
1684

1685
                dbgLog.fine("valueLabelTable=" + valueLabelTable);
1✔
1686

1687
                // create a mapping table that finds the key variable for this mapping table
1688
                String keyVariableName = OBSIndexToVariableName.get(variableIndex[0] - 1);
1✔
1689
                for (int vn : variableIndex) {
1✔
1690
                    valueVariableMappingTable.put(OBSIndexToVariableName.get(vn - 1), keyVariableName);
1✔
1691
                }
1692

1693
                dbgLog.fine("valueVariableMappingTable:\n" + valueVariableMappingTable);
1✔
UNCOV
1694
            } catch (IOException ex) {
×
1695
                //ex.printStackTrace();
UNCOV
1696
                throw ex;
×
1697
            }
1✔
1698

1699
            safteyCounter++;
1✔
1700
            if (safteyCounter >= 1000000) {
1✔
UNCOV
1701
                break;
×
1702
            }
1703
        } //while
1704

1705
        ///smd.setValueLabelTable(valueLabelTable);
1706
        assignValueLabels(valueLabelTable);
×
1707

UNCOV
1708
        dbgLog.fine("***** decodeRecordType3and4(): end *****");
×
UNCOV
1709
    }
×
1710
   
1711
    void assignValueLabels(Map<String, Map<String, String>> valueLabelTable) {
1712
        // Let's go through all the categorical value label mappings and 
1713
        // assign them to the correct variables: 
1714
        
1715
        for (DataVariable dataVariable : dataTable.getDataVariables()) {
1✔
1716
            String varName = dataVariable.getName();
1✔
1717
            
1718
            Map<String, String> valueLabelPairs = valueLabelTable.get(valueVariableMappingTable.get(varName));
1✔
1719
            if (valueLabelPairs != null && !valueLabelPairs.isEmpty()) {
1✔
1720
                for (String value : valueLabelPairs.keySet()) {
1✔
1721
                    
1722
                    VariableCategory cat = new VariableCategory();
1✔
1723
                    cat.setValue(value);
1✔
1724
                    cat.setLabel(valueLabelPairs.get(value));
1✔
1725

1726
                    /* cross-link the variable and category to each other: */
1727
                    cat.setDataVariable(dataVariable);
1✔
1728
                    dataVariable.getCategories().add(cat);
1✔
1729
                }
1✔
1730
            }
1731
        }
1✔
1732
    }
1✔
1733
    
1734

1735
    void decodeRecordType6(BufferedInputStream stream) throws IOException {
1736
        dbgLog.fine("***** decodeRecordType6(): start *****");
1✔
1737
        try {
1738
            if (stream ==null){
1✔
UNCOV
1739
                throw new IllegalArgumentException("stream == null!");
×
1740
            }
1741
            // this section is optional; so let's first check the 4-byte header 
1742
            // value and see what type it is. 
1743
            //if (stream.markSupported()){ // -- ? L.A. 4.0 alpha
1744
            stream.mark(1000);
1✔
1745
            //}
1746
            // 6.0 check the first 4 bytes
1747
            byte[] headerCodeRt6 = new byte[LENGTH_RECORD_TYPE6_CODE];
1✔
1748

1749
            int nbytes_rt6 = stream.read(headerCodeRt6, 0, LENGTH_RECORD_TYPE6_CODE);
1✔
1750
            // to-do check against nbytes
1751
            //printHexDump(headerCodeRt6, "RT6 header test");
1752
            ByteBuffer bb_header_code_rt6  = ByteBuffer.wrap(headerCodeRt6,
1✔
1753
                       0, LENGTH_RECORD_TYPE6_CODE);
1754
            if (isLittleEndian){
1✔
1755
                bb_header_code_rt6.order(ByteOrder.LITTLE_ENDIAN);
1✔
1756
            }
1757

1758
            int intRT6test = bb_header_code_rt6.getInt();
1✔
1759
            dbgLog.fine("RT6: header test value="+intRT6test);
1✔
1760
            if (intRT6test != 6){
1✔
1761
            //if (stream.markSupported()){
1762
                //out.print("iteration="+safteyCounter);
1763
                //dbgLog.fine("iteration="+safteyCounter);
1764
                dbgLog.fine("intRT6test failed="+intRT6test);
1✔
1765
                
1766
                stream.reset();
1✔
1767
                return;
1✔
1768
            //}
1769
            }
1770
            // 6.1 check 4-byte integer that tells how many lines follow
1771
            
UNCOV
1772
            byte[] length_how_many_line_bytes = new byte[LENGTH_RT6_HOW_MANY_LINES];
×
1773

UNCOV
1774
            int nbytes_rt6_1 = stream.read(length_how_many_line_bytes, 0,
×
1775
                LENGTH_RT6_HOW_MANY_LINES);
1776
            // to-do check against nbytes
1777
            
1778
            //printHexDump(length_how_many_line_bytes, "RT6 how_many_line_bytes");
1779
            ByteBuffer bb_how_many_lines = ByteBuffer.wrap(length_how_many_line_bytes,
×
1780
                       0, LENGTH_RT6_HOW_MANY_LINES);
UNCOV
1781
            if (isLittleEndian){
×
1782
                bb_how_many_lines.order(ByteOrder.LITTLE_ENDIAN);
×
1783
            }
1784

UNCOV
1785
            int howManyLinesRt6 = bb_how_many_lines.getInt();
×
1786
            dbgLog.fine("how Many lines follow="+howManyLinesRt6);
×
1787
            
1788
            // 6.2 read 80-char-long lines 
UNCOV
1789
            String[] documentRecord = new String[howManyLinesRt6];
×
1790
            
1791
            for (int i=0;i<howManyLinesRt6; i++){
×
1792
                
1793
                byte[] line = new byte[80];
×
1794
                int nbytes_rt6_line = stream.read(line);
×
1795
               
UNCOV
1796
                documentRecord[i] = StringUtils.stripEnd(new
×
1797
                    String(Arrays.copyOfRange(line,
×
1798
                    0, LENGTH_RT6_DOCUMENT_LINE),defaultCharSet), " ");
1799
                    
UNCOV
1800
                dbgLog.fine(i+"-th line ="+documentRecord[i]+"<-");
×
1801
            }
1802
            dbgLog.fine("documentRecord:\n"+StringUtils.join(documentRecord, "\n"));
×
1803

1804

1805
        } catch (IOException ex){
×
1806
            //ex.printStackTrace();
1807
            throw ex; 
×
1808
        }
×
1809
        
UNCOV
1810
        dbgLog.fine("decodeRecordType6(): end");
×
UNCOV
1811
    }
×
1812
    
1813
    
1814
    /*
1815
     * TODO: 
1816
     * Add an explanation note here documenting what "record type 7" is 
1817
     * and what information it stores. This is not obvious from the code
1818
     * below. -- L.A. 4.0 alpha
1819
    */
1820
    void decodeRecordType7(BufferedInputStream stream) throws IOException {
1821
        dbgLog.fine("decodeRecordType7(): start");
1✔
1822
        int counter=0;
1✔
1823
        int[] headerSection = new int[2];
1✔
1824

1825
        // the variables below may no longer needed; 
1826
        // but they may be useful for debugging/logging purposes.
1827

1828
        /// // RecordType 7 
1829
        /// // Subtype 3
1830
        /// List<Integer> releaseMachineSpecificInfo = new ArrayList<Integer>();
1831
        /// List<String> releaseMachineSpecificInfoHex = new ArrayList<String>();
1832
    
1833
        /// // Subytpe 4
1834
        /// Map<String, Double> OBSTypeValue = new LinkedHashMap<String, Double>();
1835
        /// Map<String, String> OBSTypeHexValue = new LinkedHashMap<String, String>();    
1836
        //Subtype 11
1837
        /// List<Integer> measurementLevel = new ArrayList<Integer>();
1838
        /// List<Integer> columnWidth = new ArrayList<Integer>();
1839
        /// List<Integer> alignment = new ArrayList<Integer>();
1840

1841

1842

1843

1844
        while(true){
1845
            try {
1846
                if (stream ==null){
1✔
UNCOV
1847
                    throw new IllegalArgumentException("RT7: stream == null!");
×
1848
                }
1849
                // first check the 4-byte header value
1850
                //if (stream.markSupported()){
1851
                stream.mark(1000);
1✔
1852
                //}
1853
                // 7.0 check the first 4 bytes
1854
                byte[] headerCodeRt7 = new byte[LENGTH_RECORD_TYPE7_CODE];
1✔
1855

1856
                int nbytes_rt7 = stream.read(headerCodeRt7, 0, 
1✔
1857
                                             LENGTH_RECORD_TYPE7_CODE);
1858
                // to-do check against nbytes
1859
                //printHexDump(headerCodeRt7, "RT7 header test");
1860
                ByteBuffer bb_header_code_rt7  = ByteBuffer.wrap(headerCodeRt7,
1✔
1861
                                                                 0, LENGTH_RECORD_TYPE7_CODE);
1862
                if (isLittleEndian){
1✔
1863
                    bb_header_code_rt7.order(ByteOrder.LITTLE_ENDIAN);
1✔
1864
                }
1865

1866
                int intRT7test = bb_header_code_rt7.getInt();
1✔
1867
                dbgLog.fine("RT7: header test value="+intRT7test);
1✔
1868
                if (intRT7test != 7){
1✔
1869
                    //if (stream.markSupported()){
1870
                    //out.print("iteration="+safteyCounter);
1871
                    //dbgLog.fine("iteration="+safteyCounter);
1872
                    dbgLog.fine("intRT7test failed="+intRT7test);
1✔
1873
                    dbgLog.fine("counter="+counter);
1✔
1874
                    stream.reset();
1✔
1875
                    return;
1✔
1876
                    //}
1877
                }
1878
            
1879
                // 7.1 check 4-byte integer Sub-Type Code
1880
            
1881
                byte[] length_sub_type_code = new byte[LENGTH_RT7_SUB_TYPE_CODE];
1✔
1882

1883
                int nbytes_rt7_1 = stream.read(length_sub_type_code, 0,
1✔
1884
                                               LENGTH_RT7_SUB_TYPE_CODE);
1885
                // to-do check against nbytes
1886
                
1887
                //printHexDump(length_how_many_line_bytes, "RT7 how_many_line_bytes");
1888
                ByteBuffer bb_sub_type_code = ByteBuffer.wrap(length_sub_type_code,
1✔
1889
                                                              0, LENGTH_RT7_SUB_TYPE_CODE);
1890
                if (isLittleEndian){
1✔
1891
                    bb_sub_type_code.order(ByteOrder.LITTLE_ENDIAN);
1✔
1892
                }
1893
                
1894
                int subTypeCode = bb_sub_type_code.getInt();
1✔
1895
                dbgLog.fine("RT7: subTypeCode="+subTypeCode);
1✔
1896
                
1897
            
1898
                switch (subTypeCode) {
1✔
1899
                case 3:
1900
                    // 3: Release andMachine-Specific Integer Information
1901
                    
1902
                    //parseRT7SubTypefield(stream);
1903
                    
1904
                    
1905
                    headerSection = parseRT7SubTypefieldHeader(stream);
1✔
1906
                    if (headerSection != null){
1✔
1907
                        int unitLength = headerSection[0];
1✔
1908
                        int numberOfUnits = headerSection[1];
1✔
1909
                        
1910
                        
1911
                        for (int i=0; i<numberOfUnits; i++){
1✔
1912
                            dbgLog.finer(i+"-th fieldData");
1✔
1913
                            byte[] work = new byte[unitLength];
1✔
1914

1915
                            int nb = stream.read(work);
1✔
1916
                            dbgLog.finer("raw bytes in Hex:"+ new String(Hex.encodeHex(work)));
1✔
1917
                            ByteBuffer bb_field = ByteBuffer.wrap(work);
1✔
1918
                            if (isLittleEndian){
1✔
1919
                                bb_field.order(ByteOrder.LITTLE_ENDIAN);
1✔
1920
                            }
1921
                            String dataInHex = new String(Hex.encodeHex(bb_field.array()));
1✔
1922
                            /// releaseMachineSpecificInfoHex.add(dataInHex);
1923
                            
1924
                            dbgLog.finer("raw bytes in Hex:"+ dataInHex);
1✔
1925
                            if (unitLength==4){
1✔
1926
                                int fieldData = bb_field.getInt();
1✔
1927
                                dbgLog.finer("fieldData(int)="+fieldData);
1✔
1928
                                dbgLog.finer("fieldData in Hex=0x"+Integer.toHexString(fieldData));
1✔
1929
                                /// releaseMachineSpecificInfo.add(fieldData);
1930
                            }
1931
                            
1932
                        }
1933
                       
1934
                        /// dbgLog.fine("releaseMachineSpecificInfo="+releaseMachineSpecificInfo);
1935
                        /// dbgLog.fine("releaseMachineSpecificInfoHex="+releaseMachineSpecificInfoHex);
1936
                        
1937
                    } else {
1938
                        // throw new IOException
1939
                    }
1940
                    
1941
                    
1942
                    dbgLog.fine("***** end of subType 3 ***** \n");
1✔
1943
                    
1944
                    break;
1✔
1945
                case 4: 
1946
                    // Release andMachine-SpecificOBS-Type Information
1947
                    headerSection = parseRT7SubTypefieldHeader(stream);
1✔
1948
                    if (headerSection != null){
1✔
1949
                        int unitLength = headerSection[0];
1✔
1950
                        int numberOfUnits = headerSection[1];
1✔
1951

1952

1953
                        for (int i=0; i<numberOfUnits; i++){
1✔
1954
                            dbgLog.finer(i+"-th fieldData:"+RecordType7SubType4Fields.get(i));
1✔
1955
                            byte[] work = new byte[unitLength];
1✔
1956

1957
                            int nb = stream.read(work);
1✔
1958

1959
                            dbgLog.finer("raw bytes in Hex:"+ new String(Hex.encodeHex(work)));
1✔
1960
                            ByteBuffer bb_field = ByteBuffer.wrap(work);
1✔
1961
                            dbgLog.finer("byte order="+bb_field.order().toString());
1✔
1962
                            if (isLittleEndian){
1✔
1963
                                bb_field.order(ByteOrder.LITTLE_ENDIAN);
1✔
1964
                            }
1965
                            ByteBuffer bb_field_dup = bb_field.duplicate();
1✔
1966
                            OBSTypeHexValue.put(RecordType7SubType4Fields.get(i),
1✔
1967
                                new String(Hex.encodeHex(bb_field.array())) );
1✔
1968
//                            dbgLog.finer("raw bytes in Hex:"+
1969
//                                OBSTypeHexValue.get(RecordType7SubType4Fields.get(i)));
1970
                            if (unitLength==8){
1✔
1971
                                double fieldData = bb_field.getDouble();
1✔
1972
                                /// OBSTypeValue.put(RecordType7SubType4Fields.get(i), fieldData);
1973
                                dbgLog.finer("fieldData(double)="+fieldData);
1✔
1974
                                OBSTypeHexValue.put(RecordType7SubType4Fields.get(i),
1✔
1975
                                                    Double.toHexString(fieldData));
1✔
1976
                                dbgLog.fine("fieldData in Hex="+Double.toHexString(fieldData));
1✔
1977
                            }
1978
                        }
1979
                        /// dbgLog.fine("OBSTypeValue="+OBSTypeValue);
1980
                        /// dbgLog.fine("OBSTypeHexValue="+OBSTypeHexValue);
1981

1982
                    } else {
1983
                        // throw new IOException
1984
                    }
1985
                    
1986

1987
                    dbgLog.fine("***** end of subType 4 ***** \n");
1✔
1988
                    break;
1✔
1989
                case 5:
1990
                    // Variable Sets Information
UNCOV
1991
                    parseRT7SubTypefield(stream);
×
1992
                    break;
×
1993
                case 6:
1994
                    // Trends date information
UNCOV
1995
                    parseRT7SubTypefield(stream);
×
1996
                    break;
×
1997
                case 7:
1998
                    // Multiple response groups
UNCOV
1999
                    parseRT7SubTypefield(stream);
×
2000
                    break;
×
2001
                case 8:
2002
                    // Windows Data Entry data
UNCOV
2003
                    parseRT7SubTypefield(stream);
×
2004
                    break;
×
2005
                case 9:
2006
                    //
UNCOV
2007
                    parseRT7SubTypefield(stream);
×
2008
                    break;
×
2009
                case 10:
2010
                    // TextSmart data
UNCOV
2011
                    parseRT7SubTypefield(stream);
×
UNCOV
2012
                    break;
×
2013
                case 11:
2014
                    // Msmt level, col width, & alignment
2015
                    //parseRT7SubTypefield(stream);
2016

2017
                    headerSection = parseRT7SubTypefieldHeader(stream);
1✔
2018
                    if (headerSection != null){
1✔
2019
                        int unitLength = headerSection[0];
1✔
2020
                        int numberOfUnits = headerSection[1];
1✔
2021

2022
                        for (int i=0; i<numberOfUnits; i++){
1✔
2023
                            dbgLog.finer(i+"-th fieldData");
1✔
2024
                            byte[] work = new byte[unitLength];
1✔
2025

2026
                            int nb = stream.read(work);
1✔
2027
                            dbgLog.finer("raw bytes in Hex:"+ new String(Hex.encodeHex(work)));
1✔
2028
                            ByteBuffer bb_field = ByteBuffer.wrap(work);
1✔
2029
                            if (isLittleEndian){
1✔
2030
                                bb_field.order(ByteOrder.LITTLE_ENDIAN);
1✔
2031
                            }
2032
                            dbgLog.finer("raw bytes in Hex:"+ new String(Hex.encodeHex(bb_field.array())));
1✔
2033
                            
2034
                            if (unitLength==4){
1✔
2035
                                int fieldData = bb_field.getInt();
1✔
2036
                                dbgLog.finer("fieldData(int)="+fieldData);
1✔
2037
                                dbgLog.finer("fieldData in Hex=0x"+Integer.toHexString(fieldData));
1✔
2038
                                
2039
                                int remainder = i%3;
1✔
2040
                                dbgLog.finer("remainder="+remainder);
1✔
2041
                                if (remainder == 0){
1✔
2042
                                    /// measurementLevel.add(fieldData);
2043
                                } else if (remainder == 1){
1✔
2044
                                    /// columnWidth.add(fieldData);
2045
                                } else if (remainder == 2){
1✔
2046
                                    /// alignment.add(fieldData);
2047
                                }
2048
                            }
2049

2050
                        }
2051

2052
                    } else {
2053
                        // throw new IOException
2054
                    }
2055
                    /// dbgLog.fine("measurementLevel="+measurementLevel);
2056
                    /// dbgLog.fine("columnWidth="+columnWidth);
2057
                    /// dbgLog.fine("alignment="+alignment);
2058
                    dbgLog.fine("end of subType 11\n");
1✔
2059

2060
                    break;
1✔
2061
                case 12:
2062
                    // Windows Data Entry GUID
UNCOV
2063
                    parseRT7SubTypefield(stream);
×
UNCOV
2064
                    break;
×
2065
                case 13:
2066
                    // Extended variable names
2067
                    // parseRT7SubTypefield(stream);
2068
                    headerSection = parseRT7SubTypefieldHeader(stream);
1✔
2069

2070
                    if (headerSection != null){
1✔
2071
                        int unitLength = headerSection[0];
1✔
2072
                        dbgLog.fine("RT7: unitLength="+unitLength);
1✔
2073
                        int numberOfUnits = headerSection[1];
1✔
2074
                        dbgLog.fine("RT7: numberOfUnits="+numberOfUnits);
1✔
2075
                        byte[] work = new byte[unitLength*numberOfUnits];
1✔
2076
                        int nbtyes13 = stream.read(work);
1✔
2077

2078
                        String[] variableShortLongNamePairs = new String(work,"US-ASCII").split("\t");
1✔
2079

2080
                        for (int i=0; i<variableShortLongNamePairs.length; i++){
1✔
2081
                            dbgLog.fine("RT7: "+i+"-th pair"+variableShortLongNamePairs[i]);
1✔
2082
                            String[] pair = variableShortLongNamePairs[i].split("=");
1✔
2083
                            shortToLongVariableNameTable.put(pair[0], pair[1]);
1✔
2084
                        }
2085

2086
                        dbgLog.fine("RT7: shortToLongVarialbeNameTable"+
1✔
2087
                                shortToLongVariableNameTable);
2088
                        // We are saving the short-to-long name map; at the
2089
                        // end of ingest, we'll go through the data variables and
2090
                        // change the names accordingly. 
2091
                        
2092
                        // smd.setShortToLongVarialbeNameTable(shortToLongVarialbeNameTable);
2093
                    } else {
1✔
2094
                        // throw new IOException
2095
                    }
2096

2097
                    break;
2098
                case 14:
2099
                    // Extended strings
2100
                    //parseRT7SubTypefield(stream);
2101
                    headerSection = parseRT7SubTypefieldHeader(stream);
×
2102

2103
                    if (headerSection != null){
×
2104
                        int unitLength = headerSection[0];
×
2105
                        dbgLog.fine("RT7.14: unitLength="+unitLength);
×
2106
                        int numberOfUnits = headerSection[1];
×
UNCOV
2107
                        dbgLog.fine("RT7.14: numberOfUnits="+numberOfUnits);
×
2108
                        byte[] work = new byte[unitLength*numberOfUnits];
×
UNCOV
2109
                        int nbtyes13 = stream.read(work);
×
2110

2111
                        String[] extendedVariablesSizePairs = new String(work,defaultCharSet).split("\000\t");
×
2112

2113
                        for (int i=0; i<extendedVariablesSizePairs.length; i++){
×
2114
                            dbgLog.fine("RT7.14: "+i+"-th pair"+extendedVariablesSizePairs[i]);
×
UNCOV
2115
                            if ( extendedVariablesSizePairs[i].indexOf("=") > 0 ) {
×
UNCOV
2116
                                String[] pair = extendedVariablesSizePairs[i].split("=");
×
UNCOV
2117
                                extendedVariablesSizeTable.put(pair[0], Integer.valueOf(pair[1]));
×
2118
                            }
2119
                        }
2120

UNCOV
2121
                        dbgLog.fine("RT7.14: extendedVariablesSizeTable"+
×
2122
                                extendedVariablesSizeTable);
UNCOV
2123
                    } else {
×
2124
                        // throw new IOException
2125
                    }
2126

2127
                    break;
2128
                case 15:
2129
                    // Clementine Metadata
UNCOV
2130
                    parseRT7SubTypefield(stream);
×
UNCOV
2131
                    break;
×
2132
                case 16:
2133
                    // 64 bit N of cases
2134
                    parseRT7SubTypefield(stream);
1✔
2135
                    break;
1✔
2136
                case 17:
2137
                    // File level attributes
UNCOV
2138
                    parseRT7SubTypefield(stream);
×
UNCOV
2139
                    break;
×
2140
                case 18:
2141
                    // Variable attributes
2142
                    parseRT7SubTypefield(stream);
1✔
2143
                    break;
1✔
2144
                case 19:
2145
                    // Extended multiple response groups
UNCOV
2146
                    parseRT7SubTypefield(stream);
×
UNCOV
2147
                    break;
×
2148
                case 20:
2149
                    // Character encoding, aka code page.
2150
                    // Must be a version 16+ feature (?).
2151
                    // Starting v.16, the default character encoding for SAV
2152
                    // files is UTF-8; but then it is possible to specify an 
2153
                    // alternative encoding here. 
2154
                    // A typical use case would be people setting it to "ISO-Latin" 
2155
                    // or "windows-1252", or a similar 8-bit encoding to store 
2156
                    // text with standard Western European accents.
2157
                    // -- L.A.
2158
                    
2159
                    headerSection = parseRT7SubTypefieldHeader(stream);
1✔
2160

2161
                    if (headerSection != null){
1✔
2162
                        int unitLength = headerSection[0];
1✔
2163
                        dbgLog.fine("RT7-20: unitLength="+unitLength);
1✔
2164
                        int numberOfUnits = headerSection[1];
1✔
2165
                        dbgLog.fine("RT7-20: numberOfUnits="+numberOfUnits);
1✔
2166
                        byte[] rt7st20bytes = new byte[unitLength*numberOfUnits];
1✔
2167
                        int nbytes20 = stream.read(rt7st20bytes);
1✔
2168

2169
                        String dataCharSet = new String(rt7st20bytes,"US-ASCII");
1✔
2170

2171
                        if (dataCharSet != null && !(dataCharSet.equals(""))) {
1✔
2172
                            dbgLog.fine("RT7-20: data charset: "+ dataCharSet);
1✔
2173
                            defaultCharSet = dataCharSet; 
1✔
2174
                        }
2175
                    } /*else {
1✔
2176
                        // TODO: 
2177
                        // decide if the exception should actually be thrown here!
2178
                        // -- L.A. 4.0 beta
2179
                        // throw new IOException
2180
                    }*/
2181
                     
2182

2183
                    break;
2184
                case 21:
2185
                    // Value labels for long strings
UNCOV
2186
                    parseRT7SubTypefield(stream);
×
2187
                    break;
×
2188
                case 22:
2189
                    // Missing values for long strings
2190
                    parseRT7SubTypefield(stream);
×
UNCOV
2191
                    break;
×
2192
                default:
2193
                    parseRT7SubTypefield(stream);
×
2194
            }
2195

UNCOV
2196
        } catch (IOException ex){
×
2197
            //ex.printStackTrace();
UNCOV
2198
            throw ex; 
×
2199
        }
1✔
2200

2201
        counter++;
1✔
2202

2203
        if (counter > 20){
1✔
UNCOV
2204
            break;
×
2205
        }
2206
    }
2207

UNCOV
2208
    dbgLog.fine("RT7: counter="+counter);
×
UNCOV
2209
        dbgLog.fine("RT7: decodeRecordType7(): end");
×
UNCOV
2210
    }
×
2211
    
2212
    
2213
    void decodeRecordType999(BufferedInputStream stream) throws IOException {
2214
        dbgLog.fine("decodeRecordType999(): start");
1✔
2215
        try {
2216
            if (stream ==null){
1✔
UNCOV
2217
                throw new IllegalArgumentException("RT999: stream == null!");
×
2218
            }
2219
            // first check the 4-byte header value
2220
            //if (stream.markSupported()){
2221
            stream.mark(1000);
1✔
2222
            //}
2223
            // 999.0 check the first 4 bytes
2224
            byte[] headerCodeRt999 = new byte[LENGTH_RECORD_TYPE999_CODE];
1✔
2225

2226
            //dbgLog.fine("RT999: stream position="+stream.pos);
2227

2228
            int nbytes_rt999 = stream.read(headerCodeRt999, 0, 
1✔
2229
                LENGTH_RECORD_TYPE999_CODE);
2230
            // to-do check against nbytes
2231
            //printHexDump(headerCodeRt999, "RT999 header test");
2232
            ByteBuffer bb_header_code_rt999  = ByteBuffer.wrap(headerCodeRt999,
1✔
2233
                       0, LENGTH_RECORD_TYPE999_CODE);
2234
            if (isLittleEndian){
1✔
2235
                bb_header_code_rt999.order(ByteOrder.LITTLE_ENDIAN);
1✔
2236
            }
2237

2238
            int intRT999test = bb_header_code_rt999.getInt();
1✔
2239
            dbgLog.fine("header test value: RT999="+intRT999test);
1✔
2240
            if (intRT999test != 999){
1✔
2241
            //if (stream.markSupported()){
UNCOV
2242
                dbgLog.fine("intRT999test failed="+intRT999test);
×
UNCOV
2243
                stream.reset();
×
UNCOV
2244
               throw new IOException("RT999:Header value(999) was not correctly detected:"+intRT999test);
×
2245
            //}
2246
            }
2247
            
2248
            
2249
            
2250
            // 999.1 check 4-byte integer Filler block
2251
            
2252
            byte[] length_filler = new byte[LENGTH_RT999_FILLER];
1✔
2253

2254
            int nbytes_rt999_1 = stream.read(length_filler, 0,
1✔
2255
                LENGTH_RT999_FILLER);
2256
            // to-do check against nbytes
2257
            
2258
            //printHexDump(length_how_many_line_bytes, "RT999 how_many_line_bytes");
2259
            ByteBuffer bb_filler = ByteBuffer.wrap(length_filler,
1✔
2260
                       0, LENGTH_RT999_FILLER);
2261
            if (isLittleEndian){
1✔
2262
                bb_filler.order(ByteOrder.LITTLE_ENDIAN);
1✔
2263
            }
2264

2265
            int rt999filler = bb_filler.getInt();
1✔
2266
            dbgLog.fine("rt999filler="+rt999filler);
1✔
2267
            
2268
            if (rt999filler == 0){
1✔
2269
                dbgLog.fine("the end of the dictionary section");
1✔
2270
            } else {
UNCOV
2271
                throw new IOException("RT999: failed to detect the end mark(0): value="+rt999filler);
×
2272
            }
2273

2274
            // missing value processing concerning HIGHEST/LOWEST values
2275

2276
            Set<Map.Entry<String,InvalidData>> msvlc = invalidDataTable.entrySet();
1✔
2277
            for (Iterator<Map.Entry<String,InvalidData>> itc = msvlc.iterator(); itc.hasNext();){
1✔
2278
                Map.Entry<String, InvalidData> et = itc.next();
×
UNCOV
2279
                String variable = et.getKey();
×
2280
                dbgLog.fine("variable="+variable);
×
2281
                InvalidData invalidDataInfo = et.getValue();
×
2282

2283
                if (invalidDataInfo.getInvalidRange() != null &&
×
2284
                    !invalidDataInfo.getInvalidRange().isEmpty()){
×
2285
                    if (invalidDataInfo.getInvalidRange().get(0).equals(OBSTypeHexValue.get("LOWEST"))){
×
2286
                        dbgLog.fine("1st value is LOWEST");
×
2287
                        invalidDataInfo.getInvalidRange().set(0, "LOWEST");
×
UNCOV
2288
                    } else if (invalidDataInfo.getInvalidRange().get(1).equals(OBSTypeHexValue.get("HIGHEST"))){
×
UNCOV
2289
                        dbgLog.fine("2nd value is HIGHEST");
×
2290
                        invalidDataInfo.getInvalidRange().set(1,"HIGHEST");
×
2291
                    }
2292
                }
UNCOV
2293
            }
×
2294
            dbgLog.fine("invalidDataTable:\n"+invalidDataTable);
1✔
2295
            // TODO: take care of the invalid data! - add the appropriate 
2296
            // value labels (?) 
2297
            // should it be done here, or at the end of ingest?
2298
            // -- L.A. 4.0 alpha
2299
            ///smd.setInvalidDataTable(invalidDataTable);
2300
        } catch (IOException ex){
×
2301
            //ex.printStackTrace();
2302
            //exit(1);
UNCOV
2303
            throw ex; 
×
2304
        }
1✔
2305
        
2306
        dbgLog.fine("decodeRecordType999(): end");
1✔
2307
    }
1✔
2308
    
2309
    
2310

2311
    void decodeRecordTypeData(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException {
2312
        dbgLog.fine("decodeRecordTypeData(): start");
1✔
2313

2314
        ///String fileUnfValue = null;
2315
        ///String[] unfValues = null;
2316

2317

2318

2319
        if (stream ==null){
1✔
UNCOV
2320
            throw new IllegalArgumentException("stream == null!");
×
2321
        }
2322
        if (isDataSectionCompressed){
1✔
2323
            decodeRecordTypeDataCompressed(stream, storeWithVariableHeader);
1✔
2324
        } else {
UNCOV
2325
            decodeRecordTypeDataUnCompressed(stream, storeWithVariableHeader);
×
2326
        }
2327
            
2328
        /* UNF calculation was here... */
2329
        
2330
        dbgLog.fine("***** decodeRecordTypeData(): end *****");
1✔
2331
    }
1✔
2332

2333
    PrintWriter createOutputWriter (BufferedInputStream stream) throws IOException {
2334
        PrintWriter pwout = null;
1✔
2335
        FileOutputStream fileOutTab = null;
1✔
2336
                
2337
        try {
2338

2339
            // create a File object to save the tab-delimited data file
2340
            File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab");
1✔
2341

2342
            String tabDelimitedDataFileName   = tabDelimitedDataFile.getAbsolutePath();
1✔
2343

2344
            // save the temp file name in the metadata object
2345
            ///smd.getFileInformation().put("tabDelimitedDataFileLocation", tabDelimitedDataFileName);
2346
            ingesteddata.setTabDelimitedFile(tabDelimitedDataFile);
1✔
2347

2348
            fileOutTab = new FileOutputStream(tabDelimitedDataFile);
1✔
2349
            
2350
            pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true);
1✔
2351

2352
        } catch (FileNotFoundException ex) {
×
2353
            ex.printStackTrace();
×
UNCOV
2354
        } catch (UnsupportedEncodingException ex) {
×
2355
            ex.printStackTrace();
×
UNCOV
2356
        } catch (IOException ex){
×
2357
            //ex.printStackTrace();
UNCOV
2358
            throw ex; 
×
2359
        }
1✔
2360

2361
        return pwout;
1✔
2362

2363
    }
2364

2365
    void decodeRecordTypeDataCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException {
2366

2367
        dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****");
1✔
2368

2369
        if (stream == null) {
1✔
UNCOV
2370
            throw new IllegalArgumentException("decodeRecordTypeDataCompressed: stream == null!");
×
2371
        }
2372

2373
        PrintWriter pwout = createOutputWriter(stream);
1✔
2374

2375
        int varQnty = dataTable.getVarQuantity().intValue();
1✔
2376
        int caseQnty = dataTable.getCaseQuantity().intValue();
1✔
2377

2378
        dbgLog.fine("varQnty: " + varQnty);
1✔
2379

2380
        dateFormatList = new String[varQnty];
1✔
2381

2382
        boolean hasStringVarContinuousBlock =
2383
                obsNonVariableBlockSet.size() > 0 ? true : false;
1✔
2384
        dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);
1✔
2385

2386
        int ii = 0;
1✔
2387

2388
        int OBS = LENGTH_SAV_OBS_BLOCK;
1✔
2389
        int nOBS = OBSUnitsPerCase;
1✔
2390

2391
        dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);
1✔
2392

2393
        int caseIndex = 0;
1✔
2394

2395
        dbgLog.fine("printFormatTable:\n" + printFormatTable);
1✔
2396
        variableFormatTypeList = new String[varQnty];
1✔
2397

2398
        // write the variable header out, if instructed to do so
2399
        if (storeWithVariableHeader) {
1✔
UNCOV
2400
            pwout.println(generateVariableHeader(dataTable.getDataVariables()));
×
2401
        }
2402

2403
        for (int i = 0; i < varQnty; i++) {
1✔
2404
            variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE.get(
1✔
2405
                    printFormatTable.get(variableNameList.get(i)));
1✔
2406
            dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
1✔
2407
            formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
1✔
2408
        }
2409
        dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
1✔
2410
        dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);
1✔
2411

2412
        // TODO: 
2413
        // Make sure the date formats are actually preserved! 
2414
        // (this is something that was collected in the code below and passed
2415
        // to the UNF calculator). 
2416
        // -- L.A. 4.0 alpha
2417
        List<String> casewiseRecordForTabFile = new ArrayList<String>();
1✔
2418

2419
        try {
2420
            // this compression is applied only to non-float data, i.e. integer;
2421
            // 8-byte float datum is kept in tact
2422
            boolean hasReachedEOF = false;
1✔
2423

2424
            OBSERVATION:
2425
            while (true) {
2426

2427
                dbgLog.fine("SAV Reader: compressed: ii=" + ii + "-th iteration");
1✔
2428

2429
                byte[] octate = new byte[LENGTH_SAV_OBS_BLOCK];
1✔
2430

2431
                int nbytes = stream.read(octate);
1✔
2432

2433
                // processCompressedOBSblock ()
2434

2435
                // (this means process a block of 8 compressed OBS
2436
                // values -- should result in 64 bytes of data total)
2437

2438
                for (int i = 0; i < LENGTH_SAV_OBS_BLOCK; i++) {
1✔
2439

2440

2441
                    dbgLog.finer("i=" + i + "-th iteration");
1✔
2442
                    int octate_i = octate[i];
1✔
2443
                    //dbgLog.fine("octate="+octate_i);
2444
                    if (octate_i < 0) {
1✔
2445
                        octate_i += 256;
1✔
2446
                    }
2447
                    int byteCode = octate_i;//octate_i & 0xF;
1✔
2448
                    //out.println("byeCode="+byteCode);
2449

2450
                    // processCompressedOBS
2451

2452
                    switch (byteCode) {
1✔
2453
                        case 252:
2454
                            // end of the file
UNCOV
2455
                            dbgLog.fine("SAV Reader: compressed: end of file mark [FC] was found");
×
UNCOV
2456
                            hasReachedEOF = true;
×
UNCOV
2457
                            break;
×
2458
                        case 253:
2459
                            // FD: uncompressed data follows after this octate
2460
                            // long string datum or float datum
2461
                            // read the following octate
2462
                            byte[] uncompressedByte = new byte[LENGTH_SAV_OBS_BLOCK];
1✔
2463
                            int ucbytes = stream.read(uncompressedByte);
1✔
2464
                            int typeIndex = (ii * OBS + i) % nOBS;
1✔
2465

2466
                            if ((OBSwiseTypelList.get(typeIndex) > 0) ||
1✔
UNCOV
2467
                                    (OBSwiseTypelList.get(typeIndex) == -1)) {
×
2468
                                // code= >0 |-1: string or its conitiguous block
2469
                                // decode as a string object
2470
                                String strdatum = new String(
1✔
2471
                                        Arrays.copyOfRange(uncompressedByte,
1✔
2472
                                        0, LENGTH_SAV_OBS_BLOCK), defaultCharSet);
2473
                                //out.println("str_datum="+strdatum+"<-");
2474
                                // add this non-missing-value string datum
2475
                                casewiseRecordForTabFile.add(strdatum);
1✔
2476
                            //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
2477
                            } else if (OBSwiseTypelList.get(typeIndex) == -2) {
1✔
2478
                                String strdatum = new String(
×
UNCOV
2479
                                        Arrays.copyOfRange(uncompressedByte,
×
2480
                                        0, LENGTH_SAV_OBS_BLOCK - 1), defaultCharSet);
UNCOV
2481
                                casewiseRecordForTabFile.add(strdatum);
×
2482
                            //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
2483
                            } else if (OBSwiseTypelList.get(typeIndex) == 0) {
×
2484
                                // code= 0: numeric
2485

2486
                                ByteBuffer bb_double = ByteBuffer.wrap(
×
2487
                                        uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK);
UNCOV
2488
                                if (isLittleEndian) {
×
2489
                                    bb_double.order(ByteOrder.LITTLE_ENDIAN);
×
2490
                                }
2491

2492
                                Double ddatum = bb_double.getDouble();
×
2493
                                // out.println("ddatum="+ddatum);
2494
                                // add this non-missing-value numeric datum
2495
                                casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
×
2496
                                dbgLog.fine("SAV Reader: compressed: added value to dataLine: " + ddatum);
×
2497

UNCOV
2498
                            } else {
×
UNCOV
2499
                                dbgLog.fine("SAV Reader: out-of-range exception");
×
UNCOV
2500
                                throw new IOException("out-of-range value was found");
×
2501
                            }
2502

2503
                            /*
2504
                            // EOF-check after reading this octate
2505
                            if (stream.available() == 0){
2506
                            hasReachedEOF = true;
2507
                            dbgLog.fine(
2508
                            "SAV Reader: *** After reading an uncompressed octate," +
2509
                            " reached the end of the file at "+ii
2510
                            +"th iteration and i="+i+"th octate position [0-start] *****");
2511
                            }
2512
                             */
2513

2514

2515
                            break;
2516
                        case 254:
2517
                            // FE: used as the missing value for string variables
2518
                            // an empty case in a string variable also takes this value
2519
                            // string variable does not accept space-only data
2520
                            // cf: uncompressed case
2521
                            // 20 20 20 20 20 20 20 20
2522
                            // add the string missing value
2523
                            // out.println("254: String missing data");
2524

UNCOV
2525
                            casewiseRecordForTabFile.add(" ");  // add "." here?
×
2526

2527

2528
                            // Note that technically this byte flag (254/xFE) means
2529
                            // that *eight* white space characters should be
2530
                            // written to the output stream. This caused me
2531
                            // a great amount of confusion, because it appeared
2532
                            // to me that there was a mismatch between the number
2533
                            // of bytes advertised in the variable metadata and
2534
                            // the number of bytes actually found in the data
2535
                            // section of a compressed SAV file; this is because
2536
                            // these 8 bytes "come out of nowhere"; they are not
2537
                            // written in the data section, but this flag specifies
2538
                            // that they should be added to the output.
2539
                            // Also, as I pointed out above, we are only writing
2540
                            // out one whitespace character, not 8 as instructed.
2541
                            // This appears to be legit; these blocks of 8 spaces
2542
                            // seem to be only used for padding, and all such
2543
                            // multiple padding spaces are stripped anyway during
2544
                            // the post-processing.
2545

2546

UNCOV
2547
                            break;
×
2548
                        case 255:
2549
                            // FF: system missing value for numeric variables
2550
                            // cf: uncompressed case (sysmis)
2551
                            // FF FF FF FF FF FF eF FF(little endian)
2552
                            // add the numeric missing value
2553
                            dbgLog.fine("SAV Reader: compressed: Missing Value, numeric");
×
UNCOV
2554
                            casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);
×
2555

2556
                            break;
×
2557
                        case 0:
2558
                            // 00: do nothing
UNCOV
2559
                            dbgLog.fine("SAV Reader: compressed: doing nothing (zero); ");
×
2560

UNCOV
2561
                            break;
×
2562
                        default:
2563
                            //out.println("byte code(default)="+ byteCode);
2564
                            if ((byteCode > 0) && (byteCode < 252)) {
1✔
2565
                                // datum is compressed
2566
                                //Integer unCompressed = Integer.valueOf(byteCode -100);
2567
                                // add this uncompressed numeric datum
2568
                                Double unCompressed = Double.valueOf(byteCode - 100);
1✔
2569
                                dbgLog.fine("SAV Reader: compressed: default case: " + unCompressed);
1✔
2570

2571
                                casewiseRecordForTabFile.add(doubleNumberFormatter.format(unCompressed));
1✔
2572
                            // out.println("uncompressed="+unCompressed);
2573
                            // out.println("dataline="+casewiseRecordForTabFile);
2574
                            }
2575
                    }// end of switch
2576

2577
                    // out.println("end of switch");
2578

2579

2580
                    // The-end-of-a-case(row)-processing
2581

2582
                    // this line that follows, and the code around it
2583
                    // is really confusing:
2584
                    int varCounter = (ii * OBS + i + 1) % nOBS;
1✔
2585
                    // while both OBS and LENGTH_SAV_OBS_BLOCK = 8
2586
                    // (OBS was initialized as OBS=LENGTH_SAV_OBS_BLOCK),
2587
                    // the 2 values mean different things:
2588
                    // LENGTH_SAV_OBS_BLOCK is the number of bytes in one OBS;
2589
                    // and OBS is the number of OBS blocks that we process
2590
                    // at a time. I.e., we process 8 chunks of 8 bytes at a time.
2591
                    // This is how data is organized inside an SAV file:
2592
                    // 8 bytes of compression flags, followd by 8x8 or fewer
2593
                    // (depending on the flags) bytes of compressed data.
2594
                    // I should rename this OBS variable something more
2595
                    // meaningful.
2596
                    //
2597
                    // Also, the "varCounter" variable name is entirely
2598
                    // misleading -- it counts not variables, but OBS blocks.
2599

2600
                    dbgLog.fine("SAV Reader: compressed: OBS counter=" + varCounter + "(ii=" + ii + ")");
1✔
2601

2602
                    if ((ii * OBS + i + 1) % nOBS == 0) {
1✔
2603

2604
                        //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
2605

2606
                        // out.println("all variables in a case are parsed == nOBS");
2607
                        // out.println("hasStringVarContinuousBlock="+hasStringVarContinuousBlock);
2608

2609
                        // check whether a string-variable's continuous block exits
2610
                        // if so, they must be joined
2611

2612
                        if (hasStringVarContinuousBlock) {
1✔
2613

2614
                            // string-variable's continuous-block-concatenating-processing
2615

2616
                            //out.println("concatenating process starts");
2617
                            //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
2618
                            //out.println("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());
2619

2620
                            StringBuilder sb = new StringBuilder("");
×
2621
                            int firstPosition = 0;
×
2622

2623
                            Set<Integer> removeJset = new HashSet<Integer>();
×
2624
                            for (int j = 0; j < nOBS; j++) {
×
UNCOV
2625
                                dbgLog.fine("RTD: j=" + j + "-th type =" + OBSwiseTypelList.get(j));
×
UNCOV
2626
                                if ((OBSwiseTypelList.get(j) == -1) ||
×
2627
                                        (OBSwiseTypelList.get(j) == -2)) {
×
2628
                                    // Continued String variable found at j-th
2629
                                    // position. look back the j-1
UNCOV
2630
                                    firstPosition = j - 1;
×
2631
                                    int lastJ = j;
×
2632
                                    String concatenated = null;
×
2633

UNCOV
2634
                                    removeJset.add(j);
×
2635
                                    sb.append(casewiseRecordForTabFile.get(j - 1));
×
2636
                                    sb.append(casewiseRecordForTabFile.get(j));
×
2637
                                    
2638
                                    for (int jc = 1; ; jc++) {
×
UNCOV
2639
                                        if ((j + jc == nOBS) 
×
UNCOV
2640
                                            || ((OBSwiseTypelList.get(j + jc) != -1) 
×
2641
                                                && (OBSwiseTypelList.get(j + jc) != -2))) {
×
2642

2643
                                            // j is the end unit of this string variable
2644
                                            concatenated = sb.toString();
×
UNCOV
2645
                                            sb.setLength(0);
×
2646
                                            lastJ = j + jc;
×
2647
                                            break;
×
2648
                                        } else {
UNCOV
2649
                                            sb.append(casewiseRecordForTabFile.get(j + jc));
×
2650
                                            removeJset.add(j + jc);
×
2651
                                        }
2652
                                    }
2653
                                    casewiseRecordForTabFile.set(j - 1, concatenated);
×
2654

2655
                                    //out.println(j-1+"th concatenated="+concatenated);
UNCOV
2656
                                    j = lastJ - 1;
×
2657

2658
                                } // end-of-if: continuous-OBS only
2659

2660
                            } // end of loop-j
2661

2662
                            //out.println("removeJset="+removeJset);
2663

2664
                            // a new list that stores a new case with concatanated string data
UNCOV
2665
                            List<String> newDataLine = new ArrayList<String>();
×
2666

2667
                            for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
×
2668
                                //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");
2669

UNCOV
2670
                                if (!removeJset.contains(jl)) {
×
2671

2672
//                                if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileString)){
2673
//                                    out.println("NA-S jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
2674
//                                } else if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileNumeric)){
2675
//                                    out.println("NA-N jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
2676
//                                } else if (casewiseRecordForTabFile.get(jl)==null){
2677
//                                    out.println("null case jl="+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
2678
//                                } else if (casewiseRecordForTabFile.get(jl).equals("NaN")){
2679
//                                    out.println("NaN jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
2680
//                                } else if (casewiseRecordForTabFile.get(jl).equals("")){
2681
//                                    out.println("blank jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
2682
//                                } else if (casewiseRecordForTabFile.get(jl).equals(" ")){
2683
//                                    out.println("space jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
2684
//                                }
2685

UNCOV
2686
                                    newDataLine.add(casewiseRecordForTabFile.get(jl));
×
2687
                                } else {
2688
//                                out.println("Excluded: jl="+jl+"-th datum=["+casewiseRecordForTabFile.get(jl)+"]");
2689
                                }
2690
                            }  // end of loop-jl
2691

2692
                            //out.println("new casewiseRecordForTabFile="+newDataLine);
2693
                            //out.println("new casewiseRecordForTabFile(size)="+newDataLine.size());
2694

UNCOV
2695
                            casewiseRecordForTabFile = newDataLine;
×
2696

2697
                        } // end-if: stringContinuousVar-exist case
2698

2699
                        // caseIndex starts from 1 not 0
2700
                        caseIndex = (ii * OBS + i + 1) / nOBS;
1✔
2701

2702
                        for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {
1✔
2703

2704
                            dbgLog.fine("k=" + k + "-th variableTypelList=" + variableTypelList.get(k));
1✔
2705

2706
                            if (variableTypelList.get(k) > 0) {
1✔
2707

2708
                                // Strip the String variables off the
2709
                                // whitespace padding:
2710

2711
                                // [ snipped ]
2712

2713
                                // I've removed the block of code above where
2714
                                // String values were substring()-ed to the
2715
                                // length specified in the variable metadata;
2716
                                // Doing that was not enough, since a string
2717
                                // can still be space-padded inside its
2718
                                // advertised capacity. (note that extended
2719
                                // variables can have many kylobytes of such
2720
                                // padding in them!) Plus it was completely
2721
                                // redundant, since we are stripping all the
2722
                                // trailing white spaces with
2723
                                // StringUtils.stripEnd() below:
2724

2725

2726
                                String paddRemoved = StringUtils.stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
1✔
2727
                                // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
2728
                                if (paddRemoved.equals("")) {
1✔
UNCOV
2729
                                    paddRemoved = " ";
×
2730
                                }
2731
                                //casewiseRecordForTabFile.set(k, "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");
2732
                                casewiseRecordForTabFile.set(k, escapeCharacterString(paddRemoved));
1✔
2733

2734
                            // end of String var case
2735

2736
                            } // end of variable-type check
2737

2738
                            if (casewiseRecordForTabFile.get(k) != null && !casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {
1✔
2739
                                
2740
                                String variableFormatType = variableFormatTypeList[k];
1✔
2741
                                dbgLog.finer("k=" + k + "th printFormatTable format=" + printFormatTable.get(variableNameList.get(k)));
1✔
2742

2743
                                int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);
1✔
2744
                                
2745

2746
                                if (variableFormatType.equals("date")) {
1✔
UNCOV
2747
                                    dbgLog.finer("date case");
×
2748

2749
                                    long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L - SPSS_DATE_OFFSET;
×
2750

2751
                                    String newDatum = sdf_ymd.format(new Date(dateDatum));
×
2752
                                    dbgLog.finer("k=" + k + ":" + newDatum);
×
2753
                                    /* saving date format */
UNCOV
2754
                                    dbgLog.finer("saving dateFormat[k] = " + sdf_ymd.toPattern());
×
UNCOV
2755
                                    casewiseRecordForTabFile.set(k, newDatum);
×
2756
                                    dateFormatList[k] = sdf_ymd.toPattern();
×
2757
                                //formatCategoryTable.put(variableNameList.get(k), "date");
2758
                                } else if (variableFormatType.equals("time")) {
1✔
2759
                                    dbgLog.finer("time case:DTIME or DATETIME or TIME");
×
2760
                                    //formatCategoryTable.put(variableNameList.get(k), "time");
2761

UNCOV
2762
                                    if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {
×
2763
                                        // We're not even going to try to handle "DTIME"
2764
                                        // values as time/dates in dataverse; this is a weird
2765
                                        // format that nobody uses outside of SPSS.
2766
                                        // (but we do need to remember to treat the resulting values 
2767
                                        // as character strings, not numerics!)
2768
                                        
2769
                                        if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
×
2770
                                            long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L - SPSS_DATE_BIAS;
×
2771
                                            String newDatum = sdf_dhms.format(new Date(dateDatum));
×
UNCOV
2772
                                            dbgLog.finer("k=" + k + ":" + newDatum);
×
2773
                                            casewiseRecordForTabFile.set(k, newDatum);
×
UNCOV
2774
                                        } else {
×
2775
                                            // decimal point included
2776
                                            String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");
×
2777

2778
                                            dbgLog.finer(StringUtils.join(timeData, "|"));
×
2779
                                            long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
×
UNCOV
2780
                                            StringBuilder sb_time = new StringBuilder(
×
2781
                                                    sdf_dhms.format(new Date(dateDatum)));
×
2782
                                            dbgLog.finer(sb_time.toString());
×
2783

UNCOV
2784
                                            if (formatDecimalPointPosition > 0) {
×
2785
                                                sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
×
2786
                                            }
2787

2788
                                            dbgLog.finer("k=" + k + ":" + sb_time.toString());
×
UNCOV
2789
                                            casewiseRecordForTabFile.set(k, sb_time.toString());
×
UNCOV
2790
                                        }
×
UNCOV
2791
                                    } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {
×
2792
                                        // TODO: 
2793
                                        // (for both datetime and "dateless" time)
2794
                                        // keep the longest of the matching formats - i.e., if there are *some*
2795
                                        // values in the vector that have thousands of a second, that should be 
2796
                                        // part of the saved format!
2797
                                        //  -- L.A. Aug. 12 2014 
2798
                                        if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
×
2799
                                            long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L - SPSS_DATE_OFFSET;
×
2800
                                            String newDatum = sdf_ymdhms.format(new Date(dateDatum));
×
2801
                                            dbgLog.finer("k=" + k + ":" + newDatum);
×
UNCOV
2802
                                            casewiseRecordForTabFile.set(k, newDatum);
×
2803
                                            dateFormatList[k] = sdf_ymdhms.toPattern();
×
UNCOV
2804
                                        } else {
×
2805
                                            // decimal point included
2806
                                            String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");
×
2807

2808
                                            //dbgLog.finer(StringUtils.join(timeData, "|"));
UNCOV
2809
                                            long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
×
UNCOV
2810
                                            StringBuilder sb_time = new StringBuilder(
×
2811
                                                    sdf_ymdhms.format(new Date(dateDatum)));
×
2812
                                            //dbgLog.finer(sb_time.toString());
2813

2814
                                            if (formatDecimalPointPosition > 0) {
×
2815
                                                sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
×
2816
                                            }
2817
                                            dbgLog.finer("k=" + k + ":" + sb_time.toString());
×
2818
                                            casewiseRecordForTabFile.set(k, sb_time.toString());
×
UNCOV
2819
                                            dateFormatList[k] = sdf_ymdhms.toPattern() + (formatDecimalPointPosition > 0 ? ".S" : "" );
×
UNCOV
2820
                                        }
×
2821
                                    } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
×
2822
                                        // TODO: 
2823
                                        // double-check that we are handling "dateless" time correctly... -- L.A. Aug. 2014
2824
                                        if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
×
2825
                                            long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L;
×
2826
                                            String newDatum = sdf_hms.format(new Date(dateDatum));
×
2827
                                            dbgLog.finer("k=" + k + ":" + newDatum);
×
UNCOV
2828
                                            casewiseRecordForTabFile.set(k, newDatum);
×
2829
                                            if (dateFormatList[k] == null) {
×
UNCOV
2830
                                                dateFormatList[k] = sdf_hms.toPattern();
×
2831
                                            }
UNCOV
2832
                                        } else {
×
2833
                                            // decimal point included
2834
                                            String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");
×
2835

2836
                                            //dbgLog.finer(StringUtils.join(timeData, "|"));
UNCOV
2837
                                            long dateDatum = Long.parseLong(timeData[0]) * 1000L;
×
UNCOV
2838
                                            StringBuilder sb_time = new StringBuilder(
×
2839
                                                    sdf_hms.format(new Date(dateDatum)));
×
2840
                                            //dbgLog.finer(sb_time.toString());
2841

2842
                                            if (formatDecimalPointPosition > 0) {
×
2843
                                                sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
×
2844
                                            }
2845
                                            dbgLog.finer("k=" + k + ":" + sb_time.toString());
×
2846
                                            casewiseRecordForTabFile.set(k, sb_time.toString());
×
2847
                                            
UNCOV
2848
                                            String format_hmsS = sdf_hms.toPattern() + (formatDecimalPointPosition > 0 ? ".S" : "");
×
2849
                                            if (dateFormatList[k] == null || (format_hmsS.length() > dateFormatList[k].length())) {
×
UNCOV
2850
                                                dateFormatList[k] = format_hmsS;
×
2851
                                            }
UNCOV
2852
                                        }
×
2853
                                    }
2854
                                    
2855
                                } else if (variableFormatType.equals("other")) {
1✔
2856
                                    dbgLog.finer("other non-date/time case:=" + i);
1✔
2857

2858
                                    if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
1✔
2859
                                        // day of week
2860
                                        dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
×
2861
                                        dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST.get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
×
UNCOV
2862
                                        String newDatum = SPSSConstants.WEEKDAY_LIST.get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
×
UNCOV
2863
                                        casewiseRecordForTabFile.set(k, newDatum);
×
2864
                                        dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
×
2865
                                    } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
1✔
2866
                                        // month
2867
                                        dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
×
2868
                                        dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST.get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
×
UNCOV
2869
                                        String newDatum = SPSSConstants.MONTH_LIST.get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
×
UNCOV
2870
                                        casewiseRecordForTabFile.set(k, newDatum);
×
UNCOV
2871
                                        dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));
×
2872
                                    }
2873
                                }
2874
                                
2875
                                
2876
                            } // end: date-time-datum check
2877

2878

2879
                        } // end: loop-k(2nd: variable-wise-check)
2880

2881

2882
                        // write to tab file
2883
                        if (casewiseRecordForTabFile.size() > 0) {
1✔
2884
                            pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
1✔
2885
                        }
2886

2887
                        // numeric contents-check
2888
                        for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {
1✔
2889
                            if (variableFormatTypeList[l].equals("date")
1✔
2890
                                    || variableFormatTypeList[l].equals("time")
1✔
2891
                                    || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
1✔
2892
                                    || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {
1✔
2893
                                // TODO: 
2894
                                // figure out if any special handling is still needed here in 4.0. 
2895
                                // -- L.A. - Aug. 2014
2896

2897
                            } else {
2898
                                if (variableTypelList.get(l) <= 0) {
1✔
2899
                                    if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
1✔
UNCOV
2900
                                        decimalVariableSet.add(l);
×
2901
                                    }
2902
                                }
2903
                            }
2904
                        }
2905

2906
                        // reset the case-wise working objects
2907
                        casewiseRecordForTabFile.clear();
1✔
2908

2909
                        if ( caseQnty > 0 ) {
1✔
2910
                            if ( caseIndex == caseQnty ) {
1✔
2911
                                hasReachedEOF = true; 
1✔
2912
                            }
2913
                        }
2914

2915
                         if (hasReachedEOF){
1✔
2916
                            break;
1✔
2917
                        }
2918

2919
                    } // if(The-end-of-a-case(row)-processing)
2920

2921
                } // loop-i (OBS unit)
2922

2923
                if ((hasReachedEOF) || (stream.available() == 0)) {
1✔
2924
                    // reached the end of this file
2925
                    // do exit-processing
2926

2927
                    dbgLog.fine("***** reached the end of the file at " + ii + "th iteration *****");
1✔
2928

2929
                    break OBSERVATION;
1✔
2930
                }
2931
                
2932
                ii++;
1✔
2933

2934
            } // while loop
1✔
2935

2936
            pwout.close();
1✔
UNCOV
2937
        } catch (IOException ex) {
×
UNCOV
2938
            throw ex;
×
2939
        }
1✔
2940
        
2941
        
2942
        dbgLog.fine("<<<<<<");
1✔
2943
        dbgLog.fine("formatCategoryTable = " + formatCategoryTable);
1✔
2944
        dbgLog.fine(">>>>>>");
1✔
2945

2946

2947
        dbgLog.fine("decimalVariableSet=" + decimalVariableSet);
1✔
2948

2949
        dbgLog.fine("decodeRecordTypeDataCompressed(): end");
1✔
2950
    }
1✔
2951

2952

2953
    void decodeRecordTypeDataUnCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException {
2954
        dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****");
×
2955

UNCOV
2956
        if (stream ==null){
×
2957
            throw new IllegalArgumentException("decodeRecordTypeDataUnCompressed: stream == null!");
×
2958
        }
2959

UNCOV
2960
        int varQnty = dataTable.getVarQuantity().intValue();
×
2961
        
2962

2963
        // 
2964
        // set-up tab file
2965
        
2966
        PrintWriter pwout = createOutputWriter ( stream ); 
×
2967
        
2968
        boolean hasStringVarContinuousBlock = 
2969
            obsNonVariableBlockSet.size() > 0 ? true : false;
×
UNCOV
2970
        dbgLog.fine("hasStringVarContinuousBlock="+hasStringVarContinuousBlock);
×
2971
        
2972
        int ii = 0;
×
2973
        
2974
        int OBS = LENGTH_SAV_OBS_BLOCK;
×
UNCOV
2975
        int nOBS = OBSUnitsPerCase;
×
2976
        
UNCOV
2977
        dbgLog.fine("OBSUnitsPerCase="+OBSUnitsPerCase);
×
2978
        
UNCOV
2979
        int caseIndex = 0;
×
2980
        
2981
        dbgLog.fine("printFormatTable:\n"+printFormatTable);
×
2982

2983
        variableFormatTypeList = new String[varQnty];
×
2984
        dateFormatList = new String[varQnty];
×
2985

2986
        for (int i = 0; i < varQnty; i++){
×
2987
            variableFormatTypeList[i]=SPSSConstants.FORMAT_CATEGORY_TABLE.get(
×
UNCOV
2988
                                                                              printFormatTable.get(variableNameList.get(i)));
×
2989
            dbgLog.fine("i="+i+"th variableFormatTypeList="+variableFormatTypeList[i]);
×
2990
            formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
×
2991
        }
2992
        dbgLog.fine("variableFormatType:\n"+Arrays.deepToString(variableFormatTypeList));
×
UNCOV
2993
        dbgLog.fine("formatCategoryTable:\n"+formatCategoryTable);
×
2994

UNCOV
2995
        int numberOfDecimalVariables = 0;
×
2996
        
2997
        // TODO: 
2998
        // Make sure the date formats are actually preserved! 
2999
        // (this is something that was collected in the code below and passed
3000
        // to the UNF calculator). 
3001
        // -- L.A. 4.0 alpha
3002
        
UNCOV
3003
        List<String> casewiseRecordForTabFile = new ArrayList<String>();
×
3004
        
3005
        
3006
        // missing values are written to the tab-delimited file by
3007
        // using the default or user-specified missing-value  strings;
3008
        // however, to calculate UNF/summary statistics,
3009
        // classes for these calculations require their specific 
3010
        // missing values that differ from the above missing-value
3011
        // strings; therefore, after row data for the tab-delimited 
3012
        // file are written, missing values in a row are changed to
3013
        // UNF/summary-statistics-OK ones.
3014

3015
        // data-storage object for sumStat
3016
        ///dataTable2 = new Object[varQnty][caseQnty];
3017
        // storage of date formats to pass to UNF        
3018
        ///dateFormats = new String[varQnty][caseQnty];
3019
        
3020
        // write the variable header out, if instructed to do so
UNCOV
3021
        if (storeWithVariableHeader) {
×
UNCOV
3022
            pwout.println(generateVariableHeader(dataTable.getDataVariables()));
×
3023
        }
3024

3025
        try {
UNCOV
3026
            for (int i = 0; ; i++){  // case-wise loop
×
3027
                
UNCOV
3028
                byte[] buffer = new byte[OBS*nOBS];
×
3029
                
UNCOV
3030
                int nbytesuc =  stream.read(buffer);
×
3031
                
3032
                StringBuilder sb_stringStorage = new StringBuilder("");
×
3033

UNCOV
3034
                for (int k=0; k < nOBS; k++){
×
UNCOV
3035
                    int offset= OBS*k;
×
3036

3037
                    // uncompressed case
3038
                    // numeric missing value == sysmis
3039
                    // FF FF FF FF FF FF eF FF(little endian)
3040
                    // string missing value
3041
                    // 20 20 20 20 20 20 20 20
3042
                    // cf: compressed case 
3043
                    // numeric type:sysmis == 0xFF
3044
                    // string type: missing value == 0xFE
3045
                    // 
3046

3047
                    boolean isNumeric = OBSwiseTypelList.get(k)==0 ? true : false;
×
3048
                    
3049
                    if (isNumeric){
×
UNCOV
3050
                        dbgLog.finer(k+"-th variable is numeric");
×
3051
                        // interprete as double
3052
                        ByteBuffer bb_double = ByteBuffer.wrap(
×
3053
                            buffer, offset , LENGTH_SAV_OBS_BLOCK);
UNCOV
3054
                        if (isLittleEndian){
×
3055
                            bb_double.order(ByteOrder.LITTLE_ENDIAN);
×
3056
                        }
3057
                        //char[] hexpattern =
3058
                        String dphex = new String(Hex.encodeHex(
×
UNCOV
3059
                                Arrays.copyOfRange(bb_double.array(),
×
3060
                                offset, offset+LENGTH_SAV_OBS_BLOCK)));
3061
                        dbgLog.finer("dphex="+ dphex);
×
3062
                            
UNCOV
3063
                        if ((dphex.equals("ffffffffffffefff"))||
×
3064
                            (dphex.equals("ffefffffffffffff"))){
×
3065
                            //casewiseRecordForTabFile.add(systemMissingValue);
3066
                            // add the numeric missing value
3067
                            dbgLog.fine("SAV Reader: adding: Missing Value (numeric)");
×
3068
                            casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);
×
3069
                        } else {
UNCOV
3070
                            Double ddatum  = bb_double.getDouble();
×
3071
                            dbgLog.fine("SAV Reader: adding: ddatum="+ddatum);
×
3072

3073
                            // add this non-missing-value numeric datum
3074
                            casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum)) ;
×
3075
                        }
3076
                    
UNCOV
3077
                    } else {
×
UNCOV
3078
                        dbgLog.finer(k+"-th variable is string");
×
3079
                        // string case
3080
                        // strip space-padding
3081
                        // do not trim: string might have spaces within it
3082
                        // the missing value (hex) for a string variable is:
3083
                        // "20 20 20 20 20 20 20 20"
3084
                        
3085
                        
3086
                        String strdatum = new String(
×
UNCOV
3087
                            Arrays.copyOfRange(buffer,
×
3088
                            offset, (offset+LENGTH_SAV_OBS_BLOCK)),defaultCharSet);
UNCOV
3089
                        dbgLog.finer("str_datum="+strdatum);
×
3090
                        // add this non-missing-value string datum 
UNCOV
3091
                        casewiseRecordForTabFile.add(strdatum);
×
3092

3093
                    } // if isNumeric
3094
                
3095
                } // k-loop
3096

3097
                // String-variable's continuous block exits:
UNCOV
3098
                if (hasStringVarContinuousBlock){
×
3099
                    // continuous blocks: string case
3100
                    // concatenating process
3101
                    //dbgLog.fine("concatenating process starts");
3102

3103
                    //dbgLog.fine("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
3104
                    //dbgLog.fine("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());
3105

3106
                    StringBuilder sb = new StringBuilder("");
×
3107
                    int firstPosition = 0;
×
3108

3109
                    Set<Integer> removeJset = new HashSet<Integer>();
×
UNCOV
3110
                    for (int j=0; j< nOBS; j++){
×
UNCOV
3111
                        dbgLog.finer("j="+j+"-th type ="+OBSwiseTypelList.get(j));
×
3112
                        if (OBSwiseTypelList.get(j) == -1){
×
3113
                            // String continued fount at j-th 
3114
                            // look back the j-1 
UNCOV
3115
                            firstPosition = j-1;
×
3116
                            int lastJ = j;
×
3117
                            String concatanated = null;
×
3118

3119
                            removeJset.add(j);
×
3120
                            sb.append(casewiseRecordForTabFile.get(j-1));
×
UNCOV
3121
                            sb.append(casewiseRecordForTabFile.get(j));
×
3122
                            for (int jc =1; ; jc++ ){
×
3123
                                if (OBSwiseTypelList.get(j+jc) != -1){
×
3124
                                // j is the end unit of this string variable
3125
                                    concatanated = sb.toString();
×
UNCOV
3126
                                    sb.setLength(0);
×
3127
                                   lastJ = j+jc;
×
3128
                                   break;
×
3129
                                } else {
UNCOV
3130
                                    sb.append(casewiseRecordForTabFile.get(j+jc));
×
3131
                                    removeJset.add(j+jc);
×
3132
                                }
3133
                            }
3134
                            casewiseRecordForTabFile.set(j-1, concatanated); 
×
3135

3136
                            //out.println(j-1+"th concatanated="+concatanated);
UNCOV
3137
                            j = lastJ -1; 
×
3138

3139
                        } // end-of-if: continuous-OBS only
3140
                    } // end of loop-j
3141

UNCOV
3142
                    List<String> newDataLine = new ArrayList<String>();
×
3143
                    
3144
                    for (int jl=0; jl<casewiseRecordForTabFile.size();jl++){
×
3145
                        //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");
3146
                        
UNCOV
3147
                        if (!removeJset.contains(jl) ){
×
UNCOV
3148
                            newDataLine.add(casewiseRecordForTabFile.get(jl));
×
3149
                        } 
3150
                    }
3151

3152
                    dbgLog.fine("new casewiseRecordForTabFile="+newDataLine);
×
UNCOV
3153
                    dbgLog.fine("new casewiseRecordForTabFile(size)="+newDataLine.size());
×
3154
                    
UNCOV
3155
                    casewiseRecordForTabFile = newDataLine;
×
3156

3157
                } // end-if: stringContinuousVar-exist case
3158

UNCOV
3159
                caseIndex++;
×
3160
                dbgLog.finer("caseIndex="+caseIndex);
×
UNCOV
3161
                for (int k = 0; k < casewiseRecordForTabFile.size(); k++){
×
3162

UNCOV
3163
                    if (variableTypelList.get(k) > 0) {
×
3164

3165
                        // See my comments for this padding removal logic
3166
                        // in the "compressed" method -- L.A.
3167

3168
                        String paddRemoved = StringUtils.stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
×
3169
                        // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
UNCOV
3170
                        if (paddRemoved.equals("")) {
×
UNCOV
3171
                            paddRemoved = " ";
×
3172
                        }
3173

3174
                        //casewiseRecordForTabFile.set(k, "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");
UNCOV
3175
                        casewiseRecordForTabFile.set(k, escapeCharacterString(paddRemoved));
×
3176
                        
3177
                        // end of String var case
3178

3179
                    } // end of variable-type check
3180
                    
3181
                    if (casewiseRecordForTabFile.get(k)!=null && !casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)){
×
3182
                        
3183
                        // to do date conversion
3184
                        String variableFormatType =  variableFormatTypeList[k];
×
UNCOV
3185
                        dbgLog.finer("k="+k+"th variable format="+variableFormatType);
×
3186

3187
                        int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);
×
3188

3189
                        if (variableFormatType.equals("date")){
×
UNCOV
3190
                            dbgLog.finer("date case");
×
3191

3192
                            long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())*1000L- SPSS_DATE_OFFSET;
×
3193

3194
                            String newDatum = sdf_ymd.format(new Date(dateDatum));
×
3195
                            dbgLog.finer("k="+k+":"+newDatum);
×
3196

3197
                            casewiseRecordForTabFile.set(k, newDatum);
×
UNCOV
3198
                            dateFormatList[k] = sdf_ymd.toPattern();
×
UNCOV
3199
                        } else if (variableFormatType.equals("time")) {
×
UNCOV
3200
                            dbgLog.finer("time case:DTIME or DATETIME or TIME");
×
3201
                            //formatCategoryTable.put(variableNameList.get(k), "time");
3202
                            // not treating DTIME as date/time; see comment elsewhere in 
3203
                            // the code; 
3204
                            // (but we do need to remember to treat the resulting values 
3205
                            // as character strings, not numerics!)
3206
                            
3207
                            if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")){
×
3208

UNCOV
3209
                                if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0){
×
3210
                                    long dateDatum  = Long.parseLong(casewiseRecordForTabFile.get(k).toString())*1000L - SPSS_DATE_BIAS;
×
3211
                                    String newDatum = sdf_dhms.format(new Date(dateDatum));
×
3212
                                    // Note: DTIME is not a complete date, so we don't save a date format with it
UNCOV
3213
                                    dbgLog.finer("k="+k+":"+newDatum);
×
3214
                                    casewiseRecordForTabFile.set(k, newDatum);
×
UNCOV
3215
                                } else {
×
3216
                                    // decimal point included
3217
                                    String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");
×
3218

3219
                                    dbgLog.finer(StringUtils.join(timeData, "|"));
×
UNCOV
3220
                                    long dateDatum = Long.parseLong(timeData[0])*1000L - SPSS_DATE_BIAS;
×
3221
                                    StringBuilder sb_time = new StringBuilder(
×
3222
                                        sdf_dhms.format(new Date(dateDatum)));
×
3223
                                    
UNCOV
3224
                                    if (formatDecimalPointPosition > 0){
×
UNCOV
3225
                                        sb_time.append("."+timeData[1].substring(0,formatDecimalPointPosition));
×
3226
                                    }
3227
                                    
3228
                                    
3229
                                    dbgLog.finer("k="+k+":"+sb_time.toString());
×
UNCOV
3230
                                    casewiseRecordForTabFile.set(k, sb_time.toString());
×
UNCOV
3231
                                }
×
UNCOV
3232
                            } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")){
×
3233
                                // TODO: 
3234
                                // (for both datetime and "dateless" time)
3235
                                // keep the longest of the matching formats - i.e., if there are *some*
3236
                                // values in the vector that have thousands of a second, that should be 
3237
                                // part of the saved format!
3238
                                //  -- L.A. Aug. 12 2014 
3239

3240
                                if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0){
×
3241
                                    long dateDatum  = Long.parseLong(casewiseRecordForTabFile.get(k).toString())*1000L - SPSS_DATE_OFFSET;
×
3242
                                    String newDatum = sdf_ymdhms.format(new Date(dateDatum));
×
3243
                                    dbgLog.finer("k="+k+":"+newDatum);
×
UNCOV
3244
                                    casewiseRecordForTabFile.set(k, newDatum);
×
3245
                                    dateFormatList[k] = sdf_ymdhms.toPattern();
×
UNCOV
3246
                                } else {
×
3247
                                    // decimal point included
3248
                                    String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");
×
3249

3250
                                    //dbgLog.finer(StringUtils.join(timeData, "|"));
UNCOV
3251
                                    long dateDatum = Long.parseLong(timeData[0])*1000L- SPSS_DATE_OFFSET;
×
UNCOV
3252
                                    StringBuilder sb_time = new StringBuilder(
×
3253
                                        sdf_ymdhms.format(new Date(dateDatum)));
×
3254
                                    //dbgLog.finer(sb_time.toString());
3255
                                    
3256
                                    if (formatDecimalPointPosition > 0){
×
3257
                                        sb_time.append("."+timeData[1].substring(0,formatDecimalPointPosition));
×
3258
                                    }
3259
                                    dbgLog.finer("k="+k+":"+sb_time.toString());
×
3260
                                    casewiseRecordForTabFile.set(k, sb_time.toString());
×
3261
                                    // datetime with milliseconds:
3262
                                    dateFormatList[k] = sdf_ymdhms.toPattern() + (formatDecimalPointPosition > 0 ? ".S" : "" );
×
3263
                                }
×
3264
                            } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")){
×
3265
                                if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0){
×
3266
                                    long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())*1000L;
×
3267
                                    String newDatum = sdf_hms.format(new Date(dateDatum));
×
3268
                                    dbgLog.finer("k="+k+":"+newDatum);
×
UNCOV
3269
                                    casewiseRecordForTabFile.set(k, newDatum);
×
3270
                                    if (dateFormatList[k] == null) {
×
UNCOV
3271
                                        dateFormatList[k] = sdf_hms.toPattern();
×
3272
                                    }
UNCOV
3273
                                } else {
×
3274
                                    // decimal point included
3275
                                    String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");
×
3276

3277
                                    //dbgLog.finer(StringUtils.join(timeData, "|"));
UNCOV
3278
                                    long dateDatum = Long.parseLong(timeData[0])*1000L;
×
UNCOV
3279
                                    StringBuilder sb_time = new StringBuilder(
×
3280
                                        sdf_hms.format(new Date(dateDatum)));
×
3281
                                    //dbgLog.finer(sb_time.toString());
3282
                                    
3283
                                    if (formatDecimalPointPosition > 0){
×
3284
                                        sb_time.append("."+timeData[1].substring(0,formatDecimalPointPosition));
×
3285
                                    }
3286
                                    dbgLog.finer("k="+k+":"+sb_time.toString());
×
3287
                                    casewiseRecordForTabFile.set(k, sb_time.toString());
×
3288
                                    // time, possibly with milliseconds:
UNCOV
3289
                                    String format_hmsS = sdf_hms.toPattern() + (formatDecimalPointPosition > 0 ? ".S" : "" );
×
3290
                                    if (dateFormatList[k] == null || (format_hmsS.length() > dateFormatList[k].length())) {
×
UNCOV
3291
                                        dateFormatList[k] = format_hmsS;
×
3292
                                    }
3293
                                }
×
3294
                            }
3295
                        } else if (variableFormatType.equals("other")){
×
UNCOV
3296
                            dbgLog.finer("other non-date/time case");
×
3297

3298
                            if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")){
×
3299
                                // day of week
3300
                                dbgLog.finer("data k="+k+":"+casewiseRecordForTabFile.get(k));
×
3301
                                dbgLog.finer("data k="+k+":"+SPSSConstants.WEEKDAY_LIST.get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString())-1));
×
3302
                                String newDatum = SPSSConstants.WEEKDAY_LIST.get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString())-1);
×
UNCOV
3303
                                casewiseRecordForTabFile.set(k, newDatum);
×
3304
                                dbgLog.finer("wkday:k="+k+":"+casewiseRecordForTabFile.get(k));
×
3305
                            } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")){
×
3306
                                // month
3307
                                dbgLog.finer("data k="+k+":"+casewiseRecordForTabFile.get(k));
×
3308
                                dbgLog.finer("data k="+k+":"+SPSSConstants.MONTH_LIST.get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString())-1));
×
UNCOV
3309
                                String newDatum = SPSSConstants.MONTH_LIST.get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString())-1);
×
UNCOV
3310
                                casewiseRecordForTabFile.set(k, newDatum);
×
UNCOV
3311
                                dbgLog.finer("month:k="+k+":"+casewiseRecordForTabFile.get(k));
×
3312

3313
                            }
3314
                        } 
3315
                        // end of date/time block
3316
                    } // end: date-time-datum check
3317

3318
                } // end: loop-k(2nd: variablte-wise-check)
3319

3320
                // write to tab file
UNCOV
3321
                if (casewiseRecordForTabFile.size() > 0) {
×
UNCOV
3322
                    pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
×
3323
                }
3324
                
3325
                // numeric contents-check
3326
                for (int l = 0; l < casewiseRecordForTabFile.size(); l++){
×
3327
                    if ( variableFormatTypeList[l].equals("date") ||
×
UNCOV
3328
                         variableFormatTypeList[l].equals("time") ||
×
UNCOV
3329
                         printFormatTable.get(variableNameList.get(l)).equals("WKDAY") ||
×
3330
                         printFormatTable.get(variableNameList.get(l)).equals("MONTH") ) {
×
3331
                        
3332
                    } else { 
UNCOV
3333
                        if (variableTypelList.get(l) <= 0) {
×
UNCOV
3334
                            if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0){
×
UNCOV
3335
                                decimalVariableSet.add(l);
×
3336
                            }
3337
                        }
3338
                    }
3339
                }
3340
                
3341
                // reset the case-wise working objects
UNCOV
3342
                casewiseRecordForTabFile.clear();
×
3343
                
UNCOV
3344
                if (stream.available() == 0){
×
3345
                    // reached the end of this file
3346
                    // do exit-processing
3347

3348
                    dbgLog.fine("reached the end of the file at "+ii
×
3349
                                +"th iteration");
3350

UNCOV
3351
                    break;
×
3352
                } // if eof processing
3353
            } //i-loop: case(row) iteration
3354

3355
            // close the writer
3356
            pwout.close();
×
3357
            
3358

UNCOV
3359
        } catch (IOException ex) {
×
UNCOV
3360
            throw ex; 
×
3361
        }
×
3362
        
3363
        // contents check
3364
        dbgLog.fine("numberOfDecimalVariables="+numberOfDecimalVariables);
×
3365
        dbgLog.fine("decimalVariableSet="+decimalVariableSet);
×
3366

UNCOV
3367
        dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): end *****");
×
UNCOV
3368
    }
×
3369

3370
    // Utility Methods  -----------------------------------------------------//
3371

3372
    private boolean variableNameIsAnIncrement (String varNameBase, String variableName){
UNCOV
3373
        if ( varNameBase == null ) {
×
3374
            return false; 
×
3375
        }
3376

UNCOV
3377
        if ( varNameBase.concat("0").equals(variableName) ) {
×
3378
            return true; 
×
3379
        } 
3380
        
UNCOV
3381
        return false; 
×
3382
    }
3383

3384
    private boolean variableNameIsAnIncrement (String varNameBase, String lastExtendedVariable, String currentVariable) {
3385

3386
        if ( varNameBase == null ||
×
3387
             lastExtendedVariable == null || 
3388
             currentVariable == null ) {
3389
            return false; 
×
3390
        }
3391

UNCOV
3392
        if ( varNameBase.length() >= lastExtendedVariable.length() ) {
×
3393
            return false; 
×
3394
        }
3395

UNCOV
3396
        if ( varNameBase.length() >= currentVariable.length() ) {
×
3397
            return false; 
×
3398
        }
3399

UNCOV
3400
        if ( !(varNameBase.equals(currentVariable.substring(0,varNameBase.length()))) ) {
×
3401
            return false; 
×
3402
        }
3403

3404
        String lastSuffix = lastExtendedVariable.substring(varNameBase.length()); 
×
3405
        String currentSuffix = currentVariable.substring(varNameBase.length()); 
×
3406

UNCOV
3407
        if ( currentSuffix.length() > 2 ) {
×
UNCOV
3408
            return false; 
×
3409
        }
3410

3411
        //if ( !currentSuffix.matches("^[0-9A-Z]*$") ) {
3412
        //    return false; 
3413
        //}
3414

UNCOV
3415
        return suffixIsAnIncrement (lastSuffix, currentSuffix); 
×
3416
    }
3417
        
3418

3419
    private boolean suffixIsAnIncrement ( String lastSuffix, String currentSuffix ) {
3420
        // Extended variable suffixes are base-36 number strings in the 
3421
        // [0-9A-Z] alphabet. I.e. the incremental suffixes go from 
3422
        // 0 to 9 to A to Z to 10 to 1Z ... etc. 
3423

3424
        int lastSuffixValue = intBase36 ( lastSuffix ); 
×
3425
        int currentSuffixValue = intBase36 ( currentSuffix ); 
×
3426

UNCOV
3427
        if ( currentSuffixValue - lastSuffixValue > 0 ) {
×
3428
            return true; 
×
3429
        }
3430

UNCOV
3431
        return false; 
×
3432
    }
3433
        
3434
    private int intBase36 ( String stringBase36 ) {
3435

3436
        // integer value of a base-36 string in [0-9A-Z] alphabet;
3437
        // i.e. "0" = 0, "9" = 9, "A" = 10, 
3438
        // "Z"  = 35, "10" = 36, "1Z" = 71 ...
3439
        
UNCOV
3440
        byte[] stringBytes = stringBase36.getBytes(); 
×
3441

3442
        int ret = 0; 
×
3443

UNCOV
3444
        for ( int i = 0; i < stringBytes.length; i++ ) {
×
3445
            int value = 0; 
×
3446
            if (stringBytes[i] >= 48 && stringBytes[i] <= 57 ) {
×
3447
                // [0-9]
3448
                value = (int)stringBytes[i] - 48; 
×
UNCOV
3449
            } else if (stringBytes[i] >= 65 && stringBytes[i] <= 90 ) {
×
3450
                // [A-Z] 
3451
                value = (int)stringBytes[i] - 55; 
×
3452
            }
3453

3454
            ret = (ret * 36) + value;
×
3455
        }
3456

UNCOV
3457
        return ret; 
×
3458
    }
3459

3460

3461
    private int getSAVintAdjustedBlockLength(int rawLength){
3462
        int adjustedLength = rawLength;
1✔
3463
        if ((rawLength%LENGTH_SAV_INT_BLOCK ) != 0){
1✔
3464
            adjustedLength = 
1✔
3465
                LENGTH_SAV_INT_BLOCK*(rawLength/LENGTH_SAV_INT_BLOCK +1) ;
3466
        }
3467
        return adjustedLength;
1✔
3468
    }
3469
    
3470
    private int getSAVobsAdjustedBlockLength(int rawLength){
3471
        int adjustedLength = rawLength;
1✔
3472
        if ((rawLength%LENGTH_SAV_OBS_BLOCK ) != 0){
1✔
3473
            adjustedLength = 
1✔
3474
                LENGTH_SAV_OBS_BLOCK*(rawLength/LENGTH_SAV_OBS_BLOCK +1) ;
3475
        }
3476
        return adjustedLength;
1✔
3477
    }
3478
    
3479
    
3480
    private int[] parseRT7SubTypefieldHeader(BufferedInputStream stream) throws IOException {
3481
        int length_unit_length = 4;
1✔
3482
        int length_number_of_units = 4;
1✔
3483
        int storage_size = length_unit_length + length_number_of_units;
1✔
3484
        
3485
        int[] headerSection = new int[2];
1✔
3486
        
3487
        byte[] byteStorage = new byte[storage_size];
1✔
3488

3489
        try {
3490
            int nbytes = stream.read(byteStorage);
1✔
3491
            // to-do check against nbytes
3492

3493
            //printHexDump(byteStorage, "RT7:storage");
3494
            
3495
            ByteBuffer bb_data_type = ByteBuffer.wrap(byteStorage,
1✔
3496
                                                      0, length_unit_length);
3497
            if (isLittleEndian){
1✔
3498
                bb_data_type.order(ByteOrder.LITTLE_ENDIAN);
1✔
3499
            }
3500

3501
            int unitLength = bb_data_type.getInt();
1✔
3502
            dbgLog.fine("parseRT7 SubTypefield: unitLength="+unitLength);
1✔
3503
            
3504
            ByteBuffer bb_number_of_units = ByteBuffer.wrap(byteStorage,
1✔
3505
                                                            length_unit_length, length_number_of_units);
3506
            if (isLittleEndian){
1✔
3507
                bb_number_of_units.order(ByteOrder.LITTLE_ENDIAN);
1✔
3508
            }
3509

3510
            int numberOfUnits = bb_number_of_units.getInt();
1✔
3511
            dbgLog.fine("parseRT7 SubTypefield: numberOfUnits="+numberOfUnits);
1✔
3512
        
3513
            headerSection[0] = unitLength;
1✔
3514
            headerSection[1] = numberOfUnits;
1✔
3515
            return headerSection;
1✔
UNCOV
3516
        } catch (IOException ex) {
×
UNCOV
3517
            throw ex;
×
3518
        }
3519
    }
3520
    
3521
    // TODO: 
3522
    // rename this method "skipRT7SubTypefield or parseAndSkip... 
3523
    // -- because that's what it really does. We only call it 
3524
    // on RT7 sub-fields that we don't know what to do with.
3525
    // -- L.A. 4.0 beta
3526
    private void parseRT7SubTypefield(BufferedInputStream stream) throws IOException {
3527
        int length_unit_length = 4;
1✔
3528
        int length_number_of_units = 4;
1✔
3529
        int storage_size = length_unit_length + length_number_of_units;
1✔
3530
        
3531
        int[] headerSection = new int[2];
1✔
3532
        
3533
        byte[] byteStorage = new byte[storage_size];
1✔
3534

3535
        try{
3536
            int nbytes = stream.read(byteStorage);
1✔
3537
            // to-do check against nbytes
3538

3539
            //printHexDump(byteStorage, "RT7:storage");
3540

3541
            ByteBuffer bb_data_type = ByteBuffer.wrap(byteStorage,
1✔
3542
                       0, length_unit_length);
3543
            if (isLittleEndian){
1✔
3544
                bb_data_type.order(ByteOrder.LITTLE_ENDIAN);
1✔
3545
            }
3546

3547
            int unitLength = bb_data_type.getInt();
1✔
3548
            dbgLog.fine("parseRT7 SubTypefield: unitLength="+unitLength);
1✔
3549

3550
            ByteBuffer bb_number_of_units = ByteBuffer.wrap(byteStorage,
1✔
3551
                       length_unit_length, length_number_of_units);
3552
            if (isLittleEndian){
1✔
3553
                bb_number_of_units.order(ByteOrder.LITTLE_ENDIAN);
1✔
3554
            }
3555

3556
            int numberOfUnits = bb_number_of_units.getInt();
1✔
3557
            dbgLog.fine("parseRT7 SubTypefield: numberOfUnits="+numberOfUnits);
1✔
3558

3559
            headerSection[0] = unitLength;
1✔
3560
            headerSection[1] = numberOfUnits;
1✔
3561
            
3562
            for (int i=0; i<numberOfUnits; i++){
1✔
3563
                byte[] work = new byte[unitLength];
1✔
3564
                
3565
                int nb = stream.read(work);
1✔
3566
                dbgLog.finer("raw bytes in Hex:"+ new String(Hex.encodeHex(work)));
1✔
3567
                ByteBuffer bb_field = ByteBuffer.wrap(work);
1✔
3568
                if (isLittleEndian){
1✔
3569
                    bb_field.order(ByteOrder.LITTLE_ENDIAN);
1✔
3570
                }
3571
                dbgLog.fine("RT7ST: raw bytes in Hex:"+ new String(Hex.encodeHex(bb_field.array())));
1✔
3572
                if (unitLength==4){
1✔
UNCOV
3573
                    int fieldData = bb_field.getInt();
×
UNCOV
3574
                    dbgLog.fine("RT7ST: "+i+"-th fieldData="+fieldData);
×
UNCOV
3575
                    dbgLog.fine("RT7ST: fieldData in Hex="+Integer.toHexString(fieldData));
×
3576
                } else if (unitLength==8){
1✔
3577
                    double fieldData = bb_field.getDouble();
1✔
3578
                    dbgLog.finer("RT7ST: "+i+"-th fieldData="+fieldData);
1✔
3579
                    dbgLog.finer("RT7ST: fieldData in Hex="+Double.toHexString(fieldData));
1✔
3580
                
3581
                }
3582
                dbgLog.finer("");
1✔
3583
            }
3584
           
UNCOV
3585
        } catch (IOException ex) {
×
3586
            //ex.printStackTrace();
UNCOV
3587
            throw ex; 
×
3588
        }
1✔
3589
        
3590
    }
1✔
3591
    
3592
    private List<byte[]> getRT7SubTypefieldData(BufferedInputStream stream) throws IOException {
3593
        int length_unit_length = 4;
×
3594
        int length_number_of_units = 4;
×
UNCOV
3595
        int storage_size = length_unit_length + length_number_of_units;
×
3596
        List<byte[]> dataList = new ArrayList<byte[]>();
×
UNCOV
3597
        int[] headerSection = new int[2];
×
3598
        
3599
        byte[] byteStorage = new byte[storage_size];
×
3600

3601
        try{
UNCOV
3602
            int nbytes = stream.read(byteStorage);
×
3603
            // to-do check against nbytes
3604

3605
            //printHexDump(byteStorage, "RT7:storage");
3606

3607
            ByteBuffer bb_data_type = ByteBuffer.wrap(byteStorage,
×
3608
                       0, length_unit_length);
UNCOV
3609
            if (isLittleEndian){
×
3610
                bb_data_type.order(ByteOrder.LITTLE_ENDIAN);
×
3611
            }
3612

3613
            int unitLength = bb_data_type.getInt();
×
UNCOV
3614
            dbgLog.fine("parseRT7SubTypefield: unitLength="+unitLength);
×
3615

3616
            ByteBuffer bb_number_of_units = ByteBuffer.wrap(byteStorage,
×
3617
                       length_unit_length, length_number_of_units);
UNCOV
3618
            if (isLittleEndian){
×
3619
                bb_number_of_units.order(ByteOrder.LITTLE_ENDIAN);
×
3620
            }
3621

3622
            int numberOfUnits = bb_number_of_units.getInt();
×
3623
            dbgLog.fine("parseRT7SubTypefield: numberOfUnits="+numberOfUnits);
×
3624

3625
            headerSection[0] = unitLength;
×
UNCOV
3626
            headerSection[1] = numberOfUnits;
×
3627

3628
            for (int i=0; i<numberOfUnits; i++){
×
3629

3630
                byte[] work = new byte[unitLength];
×
UNCOV
3631
                int nb = stream.read(work);
×
UNCOV
3632
                dbgLog.finer(new String(Hex.encodeHex(work)));
×
UNCOV
3633
                dataList.add(work);
×
3634
            }
3635

3636

3637
        } catch (IOException ex) {
×
3638
            //ex.printStackTrace();
UNCOV
3639
            throw ex; 
×
UNCOV
3640
        }
×
UNCOV
3641
        return dataList;
×
3642
    }    
3643
    
3644
    void print2Darray(Object[][] datatable, String title){
UNCOV
3645
        dbgLog.fine(title);
×
3646
        for (int i=0; i< datatable.length; i++){
×
UNCOV
3647
            dbgLog.fine(StringUtils.join(datatable[i], "|"));
×
3648
        }
UNCOV
3649
    }    
×
3650
        
3651
    
3652
}
3653

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc