• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #22002

01 Apr 2024 07:56PM CUT coverage: 20.716% (+0.5%) from 20.173%
#22002

push

github

web-flow
Merge pull request #10453 from IQSS/develop

Merge 6.2 into master

704 of 2679 new or added lines in 152 files covered. (26.28%)

81 existing lines in 49 files now uncovered.

17160 of 82836 relevant lines covered (20.72%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java
1
/*
2
   Copyright (C) 2005-2012, by the President and Fellows of Harvard College.
3

4
   Licensed under the Apache License, Version 2.0 (the "License");
5
   you may not use this file except in compliance with the License.
6
   You may obtain a copy of the License at
7

8
         http://www.apache.org/licenses/LICENSE-2.0
9

10
   Unless required by applicable law or agreed to in writing, software
11
   distributed under the License is distributed on an "AS IS" BASIS,
12
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
   See the License for the specific language governing permissions and
14
   limitations under the License.
15

16
   Dataverse Network - A web application to share, preserve and analyze research data.
17
   Developed at the Institute for Quantitative Social Science, Harvard University.
18
   Version 3.0.
19
*/
20
package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por;
21

22
import java.io.BufferedInputStream;
23
import java.io.BufferedReader;
24
import java.io.BufferedWriter;
25
import java.io.File;
26
import java.io.FileInputStream;
27
import java.io.FileOutputStream;
28
import java.io.IOException;
29
import java.io.InputStreamReader;
30
import java.io.OutputStreamWriter;
31
import java.io.PrintWriter;
32
import java.io.Writer;
33
import java.nio.ByteBuffer;
34

35
import java.text.DecimalFormat;
36
import java.text.NumberFormat;
37
import java.text.SimpleDateFormat;
38
import java.util.ArrayList;
39
import java.util.Arrays;
40
import java.util.Calendar;
41
import java.util.Date;
42
import java.util.GregorianCalendar;
43
import java.util.HashMap;
44
import java.util.HashSet;
45
import java.util.LinkedHashMap;
46
import java.util.List;
47
import java.util.Map;
48
import java.util.Scanner;
49
import java.util.Set;
50
import java.util.TimeZone;
51
import java.util.logging.Logger;
52
import java.math.BigDecimal;
53
import java.math.MathContext;
54
import java.math.RoundingMode;
55
import java.util.regex.Matcher;
56
import java.util.regex.Pattern;
57

58
import org.apache.commons.codec.binary.Hex;
59
import org.apache.commons.lang3.ArrayUtils;
60
import org.apache.commons.lang3.StringUtils;
61

62
import edu.harvard.iq.dataverse.DataTable;
63
import edu.harvard.iq.dataverse.datavariable.DataVariable;
64
import edu.harvard.iq.dataverse.datavariable.VariableCategory;
65

66
import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataFileReader;
67
import edu.harvard.iq.dataverse.ingest.tabulardata.spi.TabularDataFileReaderSpi;
68
import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest;
69
import edu.harvard.iq.dataverse.ingest.tabulardata.InvalidData;
70
import edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SPSSConstants;
71

72

73
/**
74
 * ingest plugin for SPSS/POR ("portable") file format.
75
 *
76
 * This reader plugin has been fully re-implemented for the DVN 4.0;
77
 * It is still borrows heavily from, and builds on the basis of the 
78
 * old implementation by Akio Sone, that was in use in the versions 
79
 * 2-3 of the DVN.
80
 * 
81
 * @author Akio Sone at UNC-Odum
82
 * @author Leonid Andreev
83
 */
84

85
public class PORFileReader  extends TabularDataFileReader{
86
    // static fields ---------------------------------------------------------//
87
    private static final String MissingValueForTextDataFile = "";
88

89
    private TabularDataIngest ingesteddata = new TabularDataIngest();
×
90
    private DataTable dataTable = new DataTable(); 
×
91
    
92
    private static final int POR_HEADER_SIZE = 500;   
93
    private static final int POR_MARK_POSITION_DEFAULT = 461;
94
    private static final String POR_MARK = "SPSSPORT";
95
    private static final int LENGTH_SECTION_HEADER = 1;
96
    private static final int LENGTH_SECTION_2 = 19;        
97
    private static final String MIME_TYPE = "application/x-spss-por";
98
    private static Pattern pattern4positiveInteger = Pattern.compile("[0-9A-T]+");
×
99
    private static Pattern pattern4Integer = Pattern.compile("[-]?[0-9A-T]+");
×
100
    private static Calendar GCO = new GregorianCalendar();
×
101
    static {
102
        // set the origin of GCO to 1582-10-15
103
        GCO.set(1, 1582);// year
×
104
        GCO.set(2, 9); // month
×
105
        GCO.set(5, 15);// day of month
×
106
        GCO.set(9, 0);// AM(0) or PM(1)
×
107
        GCO.set(10, 0);// hh
×
108
        GCO.set(12, 0);// mm
×
109
        GCO.set(13, 0);// ss
×
110
        GCO.set(14, 0); // SS millisecond
×
111
        GCO.set(15, 0);// z
×
112
        
113
    }
114
    private static final long SPSS_DATE_BIAS = 60*60*24*1000;
115
    private static final long SPSS_DATE_OFFSET = SPSS_DATE_BIAS + Math.abs(GCO.getTimeInMillis());
×
116
    
117

118
    // instance fields -------------------------------------------------------//
119

120
    private static Logger dbgLog = Logger.getLogger(PORFileReader.class.getPackage().getName());
×
121

122
    private boolean isCurrentVariableString = false;
×
123
    private String currentVariableName = null;
×
124

125
    private int caseQnty=0;
×
126
    private int varQnty=0;
×
127

128
    private Map<String, Integer> variableTypeTable = new LinkedHashMap<>();
×
129
    private List<Integer> variableTypelList = new ArrayList<>();
×
130
    private List<Integer> printFormatList = new ArrayList<>();
×
131
    private Map<String, String> printFormatTable = new LinkedHashMap<>();
×
132
    private Map<String, String> printFormatNameTable = new LinkedHashMap<>();
×
133
    private Map<String, String> formatCategoryTable = new LinkedHashMap<>();
×
134
    private Map<String, Map<String, String>> valueLabelTable = new LinkedHashMap<>();
×
135
    private Map<String, String> valueVariableMappingTable = new LinkedHashMap<>();
×
136
    private List<String> variableNameList = new ArrayList<>();
×
137
    private Map<String, String> variableLabelMap = new LinkedHashMap<>();
×
138
    // missing value table: string/numeric data are stored  => String
139
    // the number of missing values are unknown beforehand => List
140
    private Map<String, List<String>> missingValueTable = new LinkedHashMap<>();
×
141
    // variableName=> missingValue type[field code]
142
    private Map<String, List<String>> missingValueCodeTable = new LinkedHashMap<>();
×
143
    private Map<String, InvalidData> invalidDataTable = new LinkedHashMap<>();
×
144
    private Set<Integer> decimalVariableSet = new HashSet<>();
×
145
    private List<Integer> formatDecimalPointPositionList= new ArrayList<>();
×
146

147

148

149
    // date/time data format
150
    private SimpleDateFormat sdf_ymd    = new SimpleDateFormat("yyyy-MM-dd");
×
151
    private SimpleDateFormat sdf_ymdhms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
×
152
    private SimpleDateFormat sdf_dhms   = new SimpleDateFormat("DDD HH:mm:ss");
×
153
    private SimpleDateFormat sdf_hms    = new SimpleDateFormat("HH:mm:ss");
×
154

155
    // DecimalFormat for doubles
156
    // may need more setXXXX() to handle scientific data
157
    private NumberFormat doubleNumberFormatter = new DecimalFormat();
×
158

159
    private String[] variableFormatTypeList;
160
    private String[] dateFormatList;
161

162
    private Map<String,String> extendedLabels;
163

164
    // Constructor -----------------------------------------------------------//
165

166
    public PORFileReader(TabularDataFileReaderSpi originator){
167
        super(originator);
×
168
    }
×
169

170
    
171
    private void init() throws IOException {
172
        
173
        sdf_ymd.setTimeZone(TimeZone.getTimeZone("GMT"));
×
174
        sdf_ymdhms.setTimeZone(TimeZone.getTimeZone("GMT"));
×
175
        sdf_dhms.setTimeZone(TimeZone.getTimeZone("GMT"));
×
176
        sdf_hms.setTimeZone(TimeZone.getTimeZone("GMT"));
×
177
    
178
        doubleNumberFormatter.setGroupingUsed(false);
×
179
        doubleNumberFormatter.setMaximumFractionDigits(340); // TODO: 340?? -- L.A. 4.0 beta
×
180
    }
×
181
    
182
    @Override
183
    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File additionalData) throws IOException{
184
        dbgLog.fine("PORFileReader: read() start");
×
185
        
186
        if (additionalData != null) {
×
187
            //throw new IOException ("this plugin does not support external raw data files");
188
            dbgLog.fine("Using extended variable labels from file "+additionalData.getName());
×
189
            
190
            extendedLabels = createLabelMap(additionalData);
×
191
        }
192
        
193
        
194
        File tempPORfile = decodeHeader(stream);
×
195
        BufferedReader bfReader = null;
×
196
        
197
        try {            
198
            bfReader = new BufferedReader(new InputStreamReader(new FileInputStream(tempPORfile.getAbsolutePath()), "US-ASCII"));
×
199
            if (bfReader == null){
×
200
                dbgLog.fine("bfReader is null");
×
201
                throw new IOException("bufferedReader is null");
×
202
            }
203
            
204
            decodeSec2(bfReader);
×
205
            
206
            while(true){
207

208
                char[] header = new char[LENGTH_SECTION_HEADER]; // 1 byte
×
209
                bfReader.read(header);
×
210
                String headerId = Character.toString(header[0]);
×
211
                
212
                dbgLog.fine("////////////////////// headerId="+headerId+ "//////////////////////");
×
213
                
214
                if (headerId.equals("Z")){
×
215
                    throw new IOException("reading failure: wrong headerId(Z) here");
×
216
                }
217
                
218
                if (headerId.equals("F")) {
×
219
                    // missing value
220
                    if ((missingValueTable !=null) && (missingValueTable.size()>0)){
×
221
                        processMissingValueData();
×
222
                    }
223
                }
224
                                
225
                if (headerId.equals("8") && isCurrentVariableString){
×
226
                    headerId = "8S";
×
227
                }
228

NEW
229
                decode(headerId, bfReader, storeWithVariableHeader);
×
230

231
                
232
                // for last iteration
233
                if (headerId.equals("F")){
×
234
                    // finished the last block (F == data) 
235
                    // without reaching the end of this file.
236
                    break;
×
237
                }
238
            }
×
239
            
240
                    
241
        } finally {
242
            try {
243
                if (bfReader!= null){
×
244
                    bfReader.close();
×
245
                }
246
            } catch (IOException ex){
×
247
                ex.printStackTrace();
×
248
            }
×
249

250
            if (tempPORfile.exists()){
×
251
                tempPORfile.delete();
×
252
            }
253
        }
254
        
255
        dbgLog.fine("done parsing headers and decoding;");
×
256

257
        List<DataVariable> variableList = new ArrayList<>();
×
258
        
259
        for (int indx = 0; indx < variableTypelList.size(); indx++) {
×
260
            
261
            DataVariable dv = new DataVariable(indx, dataTable);
×
262
            String varName = variableNameList.get(indx); 
×
263
            dv.setName(varName);
×
264
            String varLabel = variableLabelMap.get(varName);
×
265
            if (varLabel != null && varLabel.length() > 255) {
×
266
                varLabel = varLabel.substring(0, 255);
×
267
            } 
268
            // TODO: do we still need to enforce the 255 byte limit on 
269
            // labels? is that enough to store whatever they have 
270
            // in their POR files at ODUM?
271
            // -- L.A. 4.0, beta11
272
            if (extendedLabels != null && extendedLabels.get(varName) != null) {
×
273
                dv.setLabel(extendedLabels.get(varName));
×
274
            } else {
275
                dv.setLabel(varLabel);
×
276
            }
277
            
278
            variableList.add(dv);            
×
279
            
280
            int simpleType = 0;
×
281
            if (variableTypelList.get(indx) != null) {
×
282
                simpleType = variableTypelList.get(indx);
×
283
            }
284

285
            if (simpleType <= 0) {
×
286
                // We need to make one last type adjustment:
287
                // Dates and Times will be stored as character values in the 
288
                // dataverse tab files; even though they are not typed as 
289
                // strings at this point:
290
                // TODO: 
291
                // Make sure the date/time format is properly preserved!
292
                // (see the setFormatCategory below... but double-check!)
293
                // -- L.A. 4.0 alpha
294
                String variableFormatType = variableFormatTypeList[indx];
×
295
                
296
                if (variableFormatType != null) {
×
297
                    if (variableFormatType.equals("time")
×
298
                        || variableFormatType.equals("date")) {
×
299
                        simpleType = 1; 
×
300
                    
301
                        String formatCategory = formatCategoryTable.get(varName);
×
302

303
                        if (formatCategory != null) {
×
304
                            if (dateFormatList[indx] != null) {
×
305
                                dbgLog.fine("setting format category to "+formatCategory);
×
306
                                variableList.get(indx).setFormatCategory(formatCategory);
×
307
                                dbgLog.fine("setting formatschemaname to "+dateFormatList[indx]);
×
308
                                variableList.get(indx).setFormat(dateFormatList[indx]);
×
309
                            }
310
                        }
311
                    } else if (variableFormatType.equals("other")) {
×
312
                        dbgLog.fine("Variable of format type \"other\"; type adjustment may be needed");
×
313
                        dbgLog.fine("SPSS print format: "+printFormatTable.get(variableList.get(indx).getName()));
×
314
                        
315
                        if (printFormatTable.get(variableList.get(indx).getName()).equals("WKDAY")
×
316
                            || printFormatTable.get(variableList.get(indx).getName()).equals("MONTH")) {
×
317
                            // week day or month; 
318
                            // These are not treated as time/date values (meaning, we 
319
                            // don't define time/date formats for them; there's likely 
320
                            // no valid ISO time/date format for just a month or a day 
321
                            // of week). However, the
322
                            // values will be stored in the TAB files as strings, 
323
                            // and not as numerics - as they were stored in the 
324
                            // SAV file. So we need to adjust the type here.
325
                            // -- L.A. 
326
                            
327
                            simpleType = 1;
×
328
                        }
329
                    }
330
                }
331
                
332
            }
333
            
334
            dbgLog.fine("Finished creating variable "+indx+", "+varName);
×
335
            
336
            // OK, we can now assign the types: 
337
            
338
            if (simpleType > 0) {
×
339
                // String: 
340
                variableList.get(indx).setTypeCharacter();
×
341
                variableList.get(indx).setIntervalDiscrete();
×
342
            } else {
343
                // Numeric: 
344
                variableList.get(indx).setTypeNumeric();
×
345
                // discrete or continuous?
346
                // "decimal variables" become dataverse data variables of interval type "continuous":
347
        
348
                if (decimalVariableSet.contains(indx)) {
×
349
                    variableList.get(indx).setIntervalContinuous();
×
350
                } else {
351
                    variableList.get(indx).setIntervalDiscrete();
×
352
                }
353
                
354
            }
355
            dbgLog.fine("Finished configuring variable type information.");
×
356
        }
357
        
358
        
359
        dbgLog.fine("done configuring variables;");
×
360
        
361
        /* 
362
         * From the original (3.6) code: 
363
            //smd.setVariableTypeMinimal(ArrayUtils.toPrimitive(variableTypelList.toArray(new Integer[variableTypelList.size()])));
364
            smd.setVariableFormat(printFormatList);
365
            smd.setVariableFormatName(printFormatNameTable);
366
            smd.setVariableFormatCategory(formatCategoryTable);
367
            smd.setValueLabelMappingTable(valueVariableMappingTable);
368
         * TODO: 
369
         * double-check that it's all being taken care of by the new plugin!
370
         * (for variable format and formatName, consult the SAV plugin)
371
         */
372
        
373
        dataTable.setDataVariables(variableList);
×
374
        
375
        // Assign value labels: 
376
        
377
        assignValueLabels(valueLabelTable);
×
378
        
379
        ingesteddata.setDataTable(dataTable);
×
380
        
381
        dbgLog.info("PORFileReader: read() end");
×
382
        return ingesteddata;
×
383
    }
384
    
385
    private void decode(String headerId, BufferedReader reader, boolean storeWithVariableHeader) throws IOException{
386
        if (headerId.equals("1")) decodeProductName(reader);
×
387
        else if (headerId.equals("2")) decodeLicensee(reader);
×
388
        else if (headerId.equals("3")) decodeFileLabel(reader);
×
389
        else if (headerId.equals("4")) decodeNumberOfVariables(reader);
×
390
        else if (headerId.equals("5")) decodeFieldNo5(reader);
×
391
        else if (headerId.equals("6")) decodeWeightVariable(reader);
×
392
        else if (headerId.equals("7")) decodeVariableInformation(reader);
×
393
        else if (headerId.equals("8")) decodeMissValuePointNumeric(reader);
×
394
        else if (headerId.equals("8S")) decodeMissValuePointString(reader);
×
395
        else if (headerId.equals("9")) decodeMissValueRangeLow(reader);
×
396
        else if (headerId.equals("A")) decodeMissValueRangeHigh(reader);
×
397
        else if (headerId.equals("B")) decodeMissValueRange(reader);
×
398
        else if (headerId.equals("C")) decodeVariableLabel(reader);
×
399
        else if (headerId.equals("D")) decodeValueLabel(reader);
×
400
        else if (headerId.equals("E")) decodeDocument(reader);
×
NEW
401
        else if (headerId.equals("F")) decodeData(reader, storeWithVariableHeader);
×
402
    }
×
403
    
404

405
    private File decodeHeader(BufferedInputStream stream) throws IOException {
406
        dbgLog.fine("decodeHeader(): start");
×
407
        File tempPORfile = null;
×
408

409
        if (stream  == null){
×
410
            throw new IllegalArgumentException("file == null!");
×
411
        }
412
        
413
        byte[] headerByes = new byte[POR_HEADER_SIZE];
×
414

415
        if (stream.markSupported()){
×
416
            stream.mark(1000);
×
417
        }
418
        int nbytes = stream.read(headerByes, 0, POR_HEADER_SIZE);
×
419

420
        //printHexDump(headerByes, "hex dump of the byte-array");
421

422
        if (nbytes == 0){
×
423
            throw new IOException("decodeHeader: reading failure");
×
424
        } else if ( nbytes < 491) {
×
425
           // Size test: by defnition, it must have at least
426
            // 491-byte header, i.e., the file size less than this threshold
427
            // is not a POR file
428
           dbgLog.fine("this file is NOT spss-por type");
×
429
           throw new IllegalArgumentException("file is not spss-por type");
×
430
        }
431
        // rewind the current reading position back to the beginning
432
        if (stream.markSupported()){
×
433
            stream.reset();
×
434
        }
435

436
        // line-terminating characters are usually one or two by defnition
437
        // however, a POR file saved by a genuine SPSS for Windows
438
        // had a three-character line terminator, i.e., failed to remove the
439
        // original file's one-character terminator when it was opened, and
440
        // saved it with the default two-character terminator without
441
        // removing original terminators. So we have to expect such a rare
442
        // case
443
        //
444
        // terminator
445
        // windows [0D0A]=>   [1310] = [CR/LF]
446
        // unix    [0A]  =>   [10]
447
        // mac     [0D]  =>   [13]
448
        // 3char  [0D0D0A]=> [131310] spss for windows rel 15
449
        //
450
        // terminating characters should be found at the following
451
        //                             column positions[counting from 0]:
452
        // unix    case: [0A]   : [80], [161], [242], [323], [404], [485]
453
        // windows case: [0D0A] : [81], [163], [245], [327], [409], [491]
454
        //           : [0D0D0A] : [82], [165], [248], [331], [414], [495]
455
        
456
        // convert b into a ByteBuffer
457
        
458
        ByteBuffer buff = ByteBuffer.wrap(headerByes);
×
459
        byte[] nlch = new byte[36];
×
460
        int pos1;
461
        int pos2;
462
        int pos3;
463
        int ucase = 0;
×
464
        int wcase = 0;
×
465
        int mcase = 0;
×
466
        int three = 0;
×
467
        int nolines = 6;
×
468
        int nocols = 80;
×
469
        for (int i = 0; i < nolines; ++i) {
×
470
            int baseBias = nocols * (i + 1);
×
471
            // 1-char case
472
            pos1 = baseBias + i;
×
473
            buff.position(pos1);
×
474
            dbgLog.finer("\tposition(1)=" + buff.position());
×
475
            int j = 6 * i;
×
476
            nlch[j] = buff.get();
×
477

478
            if (nlch[j] == 10) {
×
479
                ucase++;
×
480
            } else if (nlch[j] == 13) {
×
481
                mcase++;
×
482
            }
483

484
            // 2-char case
485
            pos2 = baseBias + 2 * i;
×
486
            buff.position(pos2);
×
487
            dbgLog.finer("\tposition(2)=" + buff.position());
×
488
            
489
            nlch[j + 1] = buff.get();
×
490
            nlch[j + 2] = buff.get();
×
491

492
            // 3-char case
493
            pos3 = baseBias + 3 * i;
×
494
            buff.position(pos3);
×
495
            dbgLog.finer("\tposition(3)=" + buff.position());
×
496
            
497
            nlch[j + 3] = buff.get();
×
498
            nlch[j + 4] = buff.get();
×
499
            nlch[j + 5] = buff.get();
×
500

501
            dbgLog.finer(i + "-th iteration position =" +
×
502
                    nlch[j] + "\t" + nlch[j + 1] + "\t" + nlch[j + 2]);
503
            dbgLog.finer(i + "-th iteration position =" +
×
504
                    nlch[j + 3] + "\t" + nlch[j + 4] + "\t" + nlch[j + 5]);
505
            
506
            if ((nlch[j + 3] == 13) &&
×
507
                (nlch[j + 4] == 13) &&
508
                (nlch[j + 5] == 10)) {
509
                three++;
×
510
            } else if ((nlch[j + 1] == 13) && (nlch[j + 2] == 10)) {
×
511
                wcase++;
×
512
            }
513

514
            buff.rewind();
×
515
        }
516
        
517
        boolean windowsNewLine = true;
×
518
        if (three == nolines) {
×
519
            windowsNewLine = false; // lineTerminator = "0D0D0A"
×
520
        } else if ((ucase == nolines) && (wcase < nolines)) {
×
521
            windowsNewLine = false; // lineTerminator = "0A"
×
522
        } else if ((ucase < nolines) && (wcase == nolines)) {
×
523
            windowsNewLine = true; //lineTerminator = "0D0A"
×
524
        } else if ((mcase == nolines) && (wcase < nolines)) {
×
525
            windowsNewLine = false; //lineTerminator = "0D"
×
526
        }
527

528

529
        buff.rewind();
×
530
        int PORmarkPosition = POR_MARK_POSITION_DEFAULT;
×
531
        if (windowsNewLine) {
×
532
            PORmarkPosition = PORmarkPosition + 5;
×
533
        } else if (three == nolines) {
×
534
            PORmarkPosition = PORmarkPosition + 10;
×
535
        }
536

537
        byte[] pormark = new byte[8];
×
538
        buff.position(PORmarkPosition);
×
539
        buff.get(pormark, 0, 8);
×
540
        String pormarks = new String(pormark);
×
541

542
        //dbgLog.fine("pormark =>" + pormarks + "<-");
543
        dbgLog.fine("pormark[hex: 53 50 53 53 50 4F 52 54 == SPSSPORT] =>" +
×
544
                new String(Hex.encodeHex(pormark)) + "<-");
×
545

546
        if (pormarks.equals(POR_MARK)) {
×
547
            dbgLog.fine("POR ID toke test: Passed");
×
548
            init();
×
549
                        
550
            dataTable.setOriginalFileFormat(MIME_TYPE);
×
551
            dataTable.setUnf("UNF:6:NOTCALCULATED");
×
552

553
        } else {
554
            dbgLog.fine("this file is NOT spss-por type");
×
555
            throw new IllegalArgumentException(
×
556
                "decodeHeader: POR ID token was not found");
557
        }
558

559
        // save the POR file without new line characters
560

561
        FileOutputStream fileOutPOR = null;
×
562
        Writer fileWriter = null;
×
563

564
        // Scanner class can handle three-character line-terminator
565
        Scanner porScanner = null;
×
566
        
567
        try {
568
            tempPORfile = File.createTempFile("tempPORfile.", ".por");
×
569
            fileOutPOR = new FileOutputStream(tempPORfile);
×
570
            fileWriter = new BufferedWriter(new OutputStreamWriter(fileOutPOR, "utf8"));
×
571
            porScanner = new Scanner(stream);
×
572

573
            // Because 64-bit and 32-bit machines decode POR's first 40-byte
574
            // sequence differently, the first 5 leader lines are skipped from
575
            // the new-line-stripped file
576

577
            int lineCounter= 0;
×
578
            while(porScanner.hasNextLine()){
×
579
                lineCounter++;
×
580
                if (lineCounter<=5){
×
581
                    String line = porScanner.nextLine();
×
582
                    dbgLog.fine("line="+lineCounter+":"+line.length()+":"+line);
×
583
                } else {
×
584
                    fileWriter.write(porScanner.nextLine());
×
585
                }
586
            }
587
        } finally {
588
            try{
589
                if (fileWriter != null){
×
590
                    fileWriter.close();
×
591
                }
592
            } catch (IOException ex){
×
593
                ex.printStackTrace();
×
594
            }
×
595

596
            if (porScanner != null){
×
597
                porScanner.close();
×
598
            }
599
        }
600

601
        return tempPORfile;
×
602
    }
603

604

605

606
    private void decodeSec2(BufferedReader reader) throws IOException {
607
        dbgLog.fine("decodeSec2(): start");
×
608
        if (reader ==null){
×
609
            throw new IllegalArgumentException("decodeSec2: stream == null!");
×
610
        }
611

612
        // Because a 64-bit machine may not save the first 40
613
        // bytes of a POR file in a way as a 32-bit machine does,
614
        // the first 5 lines of a POR file is excluded from the read-back
615
        // file and the new 1st line contains the format mark "SPSSPORT"
616
        // somewhere in it.
617

618
        // mark the start position for the later rewind
619
        if (reader.markSupported()){
×
620
            reader.mark(100000);
×
621
        }
622

623

624
        char[] sixthLineCharArray = new char[80];
×
625
        int nbytes_sixthLine = reader.read(sixthLineCharArray);
×
626

627
        String sixthLine = new String(sixthLineCharArray);
×
628
        dbgLog.fine("sixthLineCharArray="+
×
629
            Arrays.deepToString(ArrayUtils.toObject(sixthLineCharArray)));
×
630
        int signatureLocation = sixthLine.indexOf(POR_MARK);
×
631

632
        if (signatureLocation >= 0){
×
633
            dbgLog.fine("format signature was found at:"+signatureLocation);
×
634
        } else {
635
            dbgLog.severe("signature string was not found");
×
636
            throw new IOException("signature string was not found");
×
637
        }
638

639
        // rewind the position to the beginning
640
        reader.reset();
×
641

642
        // skip bytes up to the signature string
643
        long skippedBytes = reader.skip(signatureLocation);
×
644

645
        char[] sec2_leader = new char[POR_MARK.length()];
×
646
        int nbytes_sec2_leader = reader.read(sec2_leader);
×
647

648
        String leader_string = new String(sec2_leader);
×
649

650
        dbgLog.fine("format signature [SPSSPORT] detected="+leader_string);
×
651

652

653
        if (leader_string.equals("SPSSPORT")){
×
654
            dbgLog.fine("signature was correctly detected");
×
655

656
        } else {
657
            dbgLog.severe(
×
658
            "the format signature is not found at the previously located column");
659
            throw new IOException("decodeSec2: failed to find the signature string");
×
660
        }
661

662
        int length_section_2 = LENGTH_SECTION_2;
×
663

664
        char[] Sec2_bytes = new char[length_section_2];
×
665

666
        int nbytes_sec2 = reader.read(Sec2_bytes);
×
667

668
        if (nbytes_sec2 == 0){
×
669
            dbgLog.severe("decodeSec2: reading error");
×
670
            throw new IOException("decodeSec2: reading error");
×
671
        } else {
672
            dbgLog.fine("bytes read="+nbytes_sec2);
×
673
        }
674

675
        String sec2 = new String(Sec2_bytes);
×
676
        dbgLog.fine("sec2[creation date/time]="+sec2);
×
677

678
        // sec2
679
        //       0123456789012345678
680
        //       A8/YYYYMMDD6/HHMMSS
681
        // thus
682
        // section2 should has 3 elements
683

684
        String[] section2 = StringUtils.split(sec2, '/');
×
685

686
        dbgLog.fine("section2="+StringUtils.join(section2, "|"));
×
687

688
        String fileCreationDate =null;
×
689
        String fileCreationTime = null;
×
690
        if ((section2.length == 3)&& (section2[0].startsWith("A"))){
×
691
            fileCreationDate = section2[1].substring(0,7);
×
692
            fileCreationTime = section2[2];
×
693
        } else {
694
            dbgLog.severe("decodeSec2: file creation date/time were not correctly detected");
×
695
            throw new IOException("decodeSec2: file creation date/time were not correctly detected");
×
696
        }
697
        dbgLog.fine("fileCreationDate="+fileCreationDate);
×
698
        dbgLog.fine("fileCreationTime="+fileCreationTime);
×
699
        ///smd.getFileInformation().put("fileCreationDate", fileCreationDate);
700
        ///smd.getFileInformation().put("fileCreationTime", fileCreationTime);
701
        ///smd.getFileInformation().put("varFormat_schema", "SPSS");
702
        dbgLog.fine("decodeSec2(): end");
×
703
    }
×
704

705

706
    private void decodeProductName(BufferedReader reader) throws IOException {
707
        if (reader ==null){
×
708
            throw new IllegalArgumentException("decodeProductName: reader == null!");
×
709
        }
710

711
        String productName = parseStringField(reader);
×
712
        ///smd.getFileInformation().put("productName", productName);
713
    }
×
714

715

716
    private void decodeLicensee(BufferedReader reader) throws IOException {
717
        if (reader ==null){
×
718
            throw new IllegalArgumentException("decodeLicensee: reader == null!");
×
719
        }
720

721
        String licenseeName = parseStringField(reader);
×
722
        ///smd.getFileInformation().put("licenseeName", licenseeName);
723
    }
×
724

725

726
    private void decodeFileLabel(BufferedReader reader) throws IOException {
727
        if (reader ==null){
×
728
            throw new IllegalArgumentException("decodeFileLabel: reader == null!");
×
729
        }
730

731
        String fileLabel = parseStringField(reader);     
×
732
        // TODO: is this "file label" potentially useful? -- L.A. 4.0 beta
733
        ///smd.getFileInformation().put("fileLabel", fileLabel);
734
    }
×
735

736

737
    private void decodeNumberOfVariables(BufferedReader reader) throws IOException {
738
        if (reader ==null){
×
739
            throw new IllegalArgumentException("decodeNumberOfVariables: reader == null!");
×
740
        }
741
        
742
        String temp = null;
×
743
        char[] tmp = new char[1];
×
744
        StringBuilder sb = new StringBuilder();
×
745

746
        while (reader.read(tmp) > 0) {
×
747
            temp = Character.toString(tmp[0]);
×
748
            if (temp.equals("/")) {
×
749
                break;
×
750
            } else {
751
                sb.append(temp);
×
752
            }
753
        }
754

755
        String rawNumberOfVariables = sb.toString();
×
756
        int rawLength = rawNumberOfVariables.length();
×
757

758
        String numberOfVariables = StringUtils.stripStart((StringUtils.strip(rawNumberOfVariables)), "0");
×
759
        
760
        if ((numberOfVariables.equals("")) && (numberOfVariables.length() == rawLength)){
×
761
            numberOfVariables ="0";
×
762
        }
763

764
        varQnty = Integer.valueOf(numberOfVariables, 30);
×
765
        dataTable.setVarQuantity(Long.valueOf(numberOfVariables, 30));
×
766
    }
×
767

768

769
    private void decodeFieldNo5(BufferedReader reader) throws IOException {
770
        if (reader ==null){
×
771
            throw new IllegalArgumentException("decodeFieldNo5: reader == null!");
×
772
        }    
773
        
774
        int field5 = parseNumericField(reader);
×
775
    }
×
776

777

778
    private void decodeWeightVariable(BufferedReader reader) throws IOException {
779
        if (reader ==null){
×
780
            throw new IllegalArgumentException("decodeWeightVariable: reader == null!");
×
781
        }    
782
        
783
        String weightVariableName = parseStringField(reader);
×
784
        // TODO: make sure case weight variables are properly handled! 
785
        // -- L.A. 4.0 beta
786
        ///smd.getFileInformation().put("caseWeightVariableName", weightVariableName);
787
        ///smd.setCaseWeightVariableName(weightVariableName);
788
    }
×
789

790

791
    private void decodeVariableInformation(BufferedReader reader) throws IOException {
792
        if (reader ==null){
×
793
            throw new IllegalArgumentException("decodeVariableInformation: reader == null!");
×
794
        } 
795

796
        // step 1: variable type
797
        int variableType = parseNumericField(reader);
×
798
        variableTypelList.add(variableType);
×
799
        isCurrentVariableString = (variableType > 0);
×
800
            
801
            
802
        // step 2: variable name            
803
        String variableName = parseStringField(reader);
×
804
        currentVariableName = variableName;
×
805
        variableNameList.add(variableName);
×
806
        variableTypeTable.put(variableName,variableType);
×
807
           
808
        // step 3: format(print/write)
809
        int[] printWriteFormatTable = new int[6];
×
810
        for (int i=0; i < 6; i++){
×
811
            printWriteFormatTable[i]= parseNumericField(reader);
×
812
        }
813

814
        int formatCode = printWriteFormatTable[0];
×
815
        int formatWidth = printWriteFormatTable[1];
×
816
        int formatDecimalPointPosition = printWriteFormatTable[2];
×
817

818
        formatDecimalPointPositionList.add(formatDecimalPointPosition);
×
819
        if (!SPSSConstants.FORMAT_CODE_TABLE_POR.containsKey(formatCode)){
×
820
                throw new IOException("Unknown format code was found = " + formatCode);
×
821
        } else {
822
            printFormatList.add(printWriteFormatTable[0]);
×
823
        }
824

825
        if (!SPSSConstants.ORDINARY_FORMAT_CODE_SET.contains(formatCode)){
×
826
            StringBuilder sb = new StringBuilder(SPSSConstants.FORMAT_CODE_TABLE_POR.get(formatCode) + formatWidth);
×
827
            if (formatDecimalPointPosition > 0){
×
828
                sb.append("."+ formatDecimalPointPosition);
×
829
            }
830
            printFormatNameTable.put(variableName, sb.toString());
×
831
        }
832

833
        printFormatTable.put(variableName, SPSSConstants.FORMAT_CODE_TABLE_POR.get(formatCode));
×
834
    }
×
835

836

837
    private void decodeMissValuePointNumeric(BufferedReader reader) throws IOException {
838
        if (reader ==null){
×
839
            throw new IllegalArgumentException("decodeMissValuePointNumeric: reader == null!");
×
840
        }
841
        
842
        if (missingValueCodeTable.containsKey(currentVariableName)){
×
843
            missingValueCodeTable.get(currentVariableName).add("8");
×
844
        } else {
845
            List<String> mvc = new ArrayList<>();
×
846
            mvc.add("8");
×
847
            missingValueCodeTable.put(currentVariableName, mvc);
×
848
        }
849

850
        String missingValuePoint=null;
×
851

852
        // missing values are not always integers
853
        String base30value = getNumericFieldAsRawString(reader);
×
854
        if (base30value.contains(".")){
×
855
            missingValuePoint = doubleNumberFormatter.format(base30Tobase10Conversion(base30value));
×
856
        } else {
857
            missingValuePoint= Integer.valueOf(base30value, 30).toString();
×
858
        }
859

860
        if (missingValueTable.containsKey(currentVariableName)){
×
861
            // already stored
862
            (missingValueTable.get(currentVariableName)).add(missingValuePoint);
×
863
        } else {
864
            // no missing value stored
865
            List<String> mv = new ArrayList<>();
×
866
            mv.add(missingValuePoint);
×
867
            missingValueTable.put(currentVariableName, mv);
×
868
        }
869
    }
×
870

871

872
    private void decodeMissValuePointString(BufferedReader reader) throws IOException {
873
        if (reader ==null){
×
874
            throw new IllegalArgumentException("decodeMissValuePointString: reader == null!");
×
875
        }    
876
        
877
        if (missingValueCodeTable.containsKey(currentVariableName)){
×
878
            missingValueCodeTable.get(currentVariableName).add("8");
×
879
        } else {
880
            List<String> mvc = new ArrayList<>();
×
881
            mvc.add("8");
×
882
            missingValueCodeTable.put(currentVariableName, mvc);
×
883
        }
884
        
885
        String missingValuePointString  = parseStringField(reader);
×
886
        
887
        if (missingValueTable.containsKey(currentVariableName)){
×
888
            // already stored
889
            (missingValueTable.get(currentVariableName)).add(missingValuePointString);
×
890
        } else {
891
            // no missing value stored
892
            List<String> mv = new ArrayList<>();
×
893
            mv.add(missingValuePointString);
×
894
            missingValueTable.put(currentVariableName, mv);
×
895
        }
896
    }
×
897

898

899
    private void decodeMissValueRangeLow(BufferedReader reader) throws IOException {
900
        if (reader ==null){
×
901
            throw new IllegalArgumentException("decodeMissValueRangeLow: reader == null!");
×
902
        }
903
        
904
        if (missingValueCodeTable.containsKey(currentVariableName)){
×
905
            missingValueCodeTable.get(currentVariableName).add("9");
×
906
        } else {
907
            List<String> mvc = new ArrayList<>();
×
908
            mvc.add("9");
×
909
            missingValueCodeTable.put(currentVariableName, mvc);
×
910
        }
911

912
        String missingValueRangeLOtype=null;
×
913

914
        // missing values are not always integers
915
        String base30value = getNumericFieldAsRawString(reader);
×
916

917
        if (base30value.contains(".")){
×
918
            missingValueRangeLOtype = doubleNumberFormatter.format(base30Tobase10Conversion(base30value));
×
919
        } else {
920
            missingValueRangeLOtype= Integer.valueOf(base30value, 30).toString();
×
921
        }
922
        
923
        if (missingValueTable.containsKey(currentVariableName)){
×
924
            // already stored
925
            (missingValueTable.get(currentVariableName)).add("LOWEST");
×
926
            (missingValueTable.get(currentVariableName)).add(missingValueRangeLOtype);
×
927
        } else {
928
            // no missing value stored
929
            List<String> mv = new ArrayList<>();
×
930
            mv.add("LOWEST");
×
931
            mv.add(missingValueRangeLOtype);
×
932
            missingValueTable.put(currentVariableName, mv);
×
933
        }
934
    }
×
935

936

937
    private void decodeMissValueRangeHigh(BufferedReader reader) throws IOException {
938
        if (reader ==null){
×
939
            throw new IllegalArgumentException("decodeMissValueRangeHigh: reader == null!");
×
940
        }
941
        
942
        if (missingValueCodeTable.containsKey(currentVariableName)){
×
943
            missingValueCodeTable.get(currentVariableName).add("A");
×
944
        } else {
945
            List<String> mvc = new ArrayList<>();
×
946
            mvc.add("A");
×
947
            missingValueCodeTable.put(currentVariableName, mvc);
×
948
        }
949

950
        String missingValueRangeHItype = null;
×
951

952
        // missing values are not always integers
953
        String base30value = getNumericFieldAsRawString(reader);
×
954

955
        if (base30value.contains(".")){
×
956
            missingValueRangeHItype = doubleNumberFormatter.format(base30Tobase10Conversion(base30value));
×
957
        } else {
958
            missingValueRangeHItype= Integer.valueOf(base30value, 30).toString();
×
959
        }
960

961
        if (missingValueTable.containsKey(currentVariableName)){
×
962
            // already stored
963
            (missingValueTable.get(currentVariableName)).add(missingValueRangeHItype);
×
964
            (missingValueTable.get(currentVariableName)).add("HIGHEST");
×
965
        } else {
966
            // no missing value stored
967
           List<String> mv = new ArrayList<>();
×
968
           mv.add(missingValueRangeHItype);
×
969
           mv.add("HIGHEST");
×
970
           missingValueTable.put(currentVariableName, mv);
×
971
        }
972
    }
×
973
    
974
    
975
    private void decodeMissValueRange(BufferedReader reader) throws IOException {
976
        if (reader ==null){
×
977
            throw new IllegalArgumentException("decodeMissValueRange: reader == null!");
×
978
        }
979

980
        if (missingValueCodeTable.containsKey(currentVariableName)){
×
981
            missingValueCodeTable.get(currentVariableName).add("B");
×
982
        } else {
983
            List<String> mvc = new ArrayList<>();
×
984
            mvc.add("B");
×
985
            missingValueCodeTable.put(currentVariableName, mvc);
×
986
        }
987
        
988
        String[] missingValueRange = new String[2];
×
989

990
       // missing values are not always integers
991
        String base30value0 = getNumericFieldAsRawString(reader);
×
992

993
        if (base30value0.contains(".")){
×
994
            missingValueRange[0] = doubleNumberFormatter.format(base30Tobase10Conversion(base30value0));
×
995
        } else {
996
            missingValueRange[0]= Integer.valueOf(base30value0, 30).toString();
×
997
        }
998

999
        String base30value1 = getNumericFieldAsRawString(reader);
×
1000

1001
        if (base30value1.contains(".")){
×
1002
            missingValueRange[1] = doubleNumberFormatter.format(base30Tobase10Conversion(base30value1));
×
1003
        } else {
1004
            missingValueRange[1]= Integer.valueOf(base30value1, 30).toString();
×
1005
        }
1006

1007
        if (missingValueTable.containsKey(currentVariableName)){
×
1008
            // already stored
1009
            (missingValueTable.get(currentVariableName)).add(missingValueRange[0]);
×
1010
            (missingValueTable.get(currentVariableName)).add(missingValueRange[1]);
×
1011
        } else {
1012
            // no missing value stored
1013
           List<String> mv = new ArrayList<>();
×
1014
           mv.add(missingValueRange[0]);
×
1015
           mv.add(missingValueRange[1]);
×
1016
           missingValueTable.put(currentVariableName, mv);
×
1017
        }
1018
    }
×
1019
    
1020

1021
    private void decodeVariableLabel(BufferedReader reader) throws IOException {
1022
        if (reader ==null){
×
1023
            throw new IllegalArgumentException("decodeVariableLabel: reader == null!");
×
1024
        }    
1025

1026
        String variableLabel = parseStringField(reader);
×
1027
        variableLabelMap.put(currentVariableName, variableLabel);
×
1028
        // note: not all variables have their variable label; therefore,
1029
        // saving them to the metatadata object is done within read() method
1030

1031
    }
×
1032
    
1033
    
1034
    private void decodeValueLabel(BufferedReader reader) throws IOException {
1035
        Map<String, String> valueLabelSet = new LinkedHashMap<>();
×
1036
        
1037
        int numberOfVariables = parseNumericField(reader);
×
1038
        String[] variableNames = new String[numberOfVariables];
×
1039

1040
        for (int i= 0; i< numberOfVariables; i++){
×
1041
            variableNames[i] = parseStringField(reader);
×
1042
        }
1043

1044
        int numberOfvalueLabelSets = parseNumericField(reader);
×
1045
        boolean isStringType = variableTypeTable.get(variableNames[0]) > 0;
×
1046

1047
        for (int i=0; i<numberOfvalueLabelSets ;i++){
×
1048
            String[] tempValueLabel = new String[2];
×
1049
            if (isStringType){
×
1050
                // String case
1051
                tempValueLabel[0] = parseStringField(reader);
×
1052
            } else {
1053
                // Numeric case
1054
                // values may not be always integers
1055
                String base30value = getNumericFieldAsRawString(reader);
×
1056

1057
                Matcher matcher = pattern4Integer.matcher(base30value);
×
1058

1059
                if (matcher.matches()) {
×
1060
                    // integer case
1061
                    tempValueLabel[0] = Long.valueOf(base30value, 30).toString();
×
1062
                } else {
1063
                    // double case
1064
                    tempValueLabel[0] = doubleNumberFormatter.format(base30Tobase10Conversion(base30value));
×
1065
                }
1066
            }
1067

1068

1069
            tempValueLabel[1] = parseStringField(reader);
×
1070
            valueLabelSet.put(tempValueLabel[0],tempValueLabel[1]);
×
1071
        }
1072
        // save the value-label mapping list
1073
        // use the first variable name as the key
1074
        valueLabelTable.put(variableNames[0], valueLabelSet);
×
1075

1076
        // create a mapping table that finds the key variable for this mapping table
1077
        for (String vn : variableNames){
×
1078
            valueVariableMappingTable.put(vn, variableNames[0]);
×
1079
        }
1080
    }
×
1081

1082

1083
    private void decodeDocument(BufferedReader reader) throws IOException {
1084
        if (reader ==null){
×
1085
            throw new IllegalArgumentException("decodeVariableLabel: reader == null!");
×
1086
        }    
1087
        
1088
        int noOfdocumentLines = parseNumericField(reader);
×
1089
        String[] document = new String[noOfdocumentLines];
×
1090

1091
        for (int i= 0; i< noOfdocumentLines; i++){
×
1092
            document[i] = parseStringField(reader);
×
1093
        }
1094

1095
        // TODO: 
1096
        // verify if this "document" is any useful potentially. 
1097
        // -- L.A. 4.0 beta
1098
        ///smd.getFileInformation().put("document", StringUtils.join(document," " ));
1099
    }
×
1100

1101

1102
    private void decodeData(BufferedReader reader, boolean storeWithVariableHeader) throws IOException {
1103
        dbgLog.fine("decodeData(): start");
×
1104
        // TODO: get rid of this "variableTypeFinal"; -- L.A. 4.0 beta
1105
        int[] variableTypeFinal= new int[varQnty];
×
1106
        dateFormatList = new String[varQnty];
×
1107

1108
        // create a File object to save the tab-delimited data file
1109
        File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab");
×
1110
        ingesteddata.setTabDelimitedFile(tabDelimitedDataFile);
×
1111
        
1112

1113
        FileOutputStream fileOutTab = null;
×
1114
        PrintWriter pwout = null;
×
1115

1116
        try {
1117
            fileOutTab = new FileOutputStream(tabDelimitedDataFile);
×
1118
            pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true);
×
1119

1120
            variableFormatTypeList = new String[varQnty];
×
1121
            for (int i = 0; i < varQnty; i++) {
×
1122
                variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE.get(printFormatTable.get(variableNameList.get(i)));
×
1123
                formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
×
1124
            }
1125

1126
            // contents (variable) checker concering decimals
1127
            Arrays.fill(variableTypeFinal, 0);
×
1128

NEW
1129
            if (storeWithVariableHeader) {
×
NEW
1130
                pwout.println(StringUtils.join(variableNameList, "\t"));
×
1131
            } 
1132
            // raw-case counter
1133
            int j = 0; // case
×
1134

1135
            // use while instead for because the number of cases (observations) is usually unknown
1136
            FBLOCK: while(true){
1137
                j++;
×
1138

1139
                // case(row)-wise storage object; to be updated after each row-reading
1140

1141
                String[] casewiseRecordForTabFile = new String[varQnty];
×
1142
                // warning: the above object is later shallow-copied to the
1143
                // data object for calculating a UNF value/summary statistics
1144
                //
1145

1146
                for (int i=0; i<varQnty; i++){
×
1147
                    // check the type of this variable
1148
                    boolean isStringType = variableTypeTable.get(variableNameList.get(i)) > 0;
×
1149

1150
                    if (isStringType){
×
1151
                        // String case
1152
                        variableTypeFinal[i]=-1;
×
1153

1154
                        StringBuilder sb_StringLengthBase30 = new StringBuilder("");
×
1155
                        int stringLengthBase10 = 0;
×
1156
                        String buffer = "";
×
1157
                        char[] tmp = new char[1];
×
1158

1159
                        int nint;
1160
                        while((nint = reader.read(tmp))>0){
×
1161
                            buffer =  Character.toString(tmp[0]);
×
1162
                            if (buffer.equals("/")){
×
1163
                                break;
×
1164
                            } else if (buffer.equals("Z")){
×
1165
                                if (i == 0){
×
1166
                                    // the reader has passed the last case; subtract 1 from the j counter
1167
                                    caseQnty = j-1;
×
1168
                                    break FBLOCK;
×
1169
                                }
1170
                            } else {
1171
                                sb_StringLengthBase30.append(buffer);
×
1172
                            }
1173

1174

1175
                        }
1176

1177
                        if (nint == 0){
×
1178
                            // no more data to be read (reached the eof)
1179
                            caseQnty = j - 1;
×
1180
                            break FBLOCK;
×
1181
                        }
1182

1183

1184
                        dbgLog.finer(j+"-th case "+i+"=th var:datum length=" +sb_StringLengthBase30.toString());
×
1185

1186
                        // this length value should be a positive integer
1187
                        Matcher mtr = pattern4positiveInteger.matcher(sb_StringLengthBase30.toString());
×
1188
                        if (mtr.matches()){
×
1189
                            stringLengthBase10 = Integer.valueOf(sb_StringLengthBase30.toString(), 30);
×
1190
                        } else{
1191
                            // reading error case
1192
                            throw new IOException("reading F(data) section: string: length is not integer");
×
1193
                        }
1194

1195
                        // read this string-variable's contents after "/"
1196
                        char[] char_datumString = new char[stringLengthBase10];
×
1197
                        reader.read(char_datumString);
×
1198

1199
                        String datum = new String(char_datumString);
×
1200
                        casewiseRecordForTabFile[i] =  "\"" + datum.replaceAll("\"",Matcher.quoteReplacement("\\\"")) + "\"";
×
1201
                        // end of string case
1202
                    } else {
×
1203

1204
                        // numeric case
1205
                        StringBuilder sb_datumNumericBase30 = new StringBuilder("");
×
1206
                        boolean isMissingValue = false;
×
1207
                        String datum = null;
×
1208
                        String datumForTabFile = null;
×
1209
                        String datumDateFormat = null;
×
1210

1211
                        String buffer = "";
×
1212
                        char[] tmp = new char[1];
×
1213
                        int nint;
1214
                        while((nint = reader.read(tmp))>0){
×
1215
                            sb_datumNumericBase30.append(buffer);
×
1216
                            buffer = Character.toString(tmp[0]);
×
1217

1218
                            if (buffer.equals("/")){
×
1219
                                break;
×
1220
                            } else if (buffer.equals("Z")){
×
1221
                                if (i == 0){
×
1222
                                    // the reader has passed the last case
1223
                                    // subtract 1 from the j counter
1224
                                    dbgLog.fine("Z-mark was detected");
×
1225
                                    caseQnty = j-1;
×
1226
                                    break FBLOCK;
×
1227
                                }
1228
                            } else if (buffer.equals("*")) {
×
1229
                                // '*' is the first character of the system missing value
1230
                                datumForTabFile = MissingValueForTextDataFile;
×
1231
                                datum = null;
×
1232
                                isMissingValue = true;
×
1233

1234
                               // read next char '.' as part of the missing value
1235
                                reader.read(tmp);
×
1236
                                buffer = Character.toString(tmp[0]);
×
1237
                                break;
×
1238
                            }
1239

1240
                        }
1241
                        if (nint == 0){
×
1242
                            // no more data to be read; reached the eof
1243
                            caseQnty = j - 1;
×
1244
                            break FBLOCK;
×
1245
                        }
1246

1247
                        // follow-up process for non-missing-values
1248
                        if (!isMissingValue) {
×
1249
                            // decode a numeric datum as String
1250
                            String datumNumericBase30 = sb_datumNumericBase30.toString();
×
1251
                            Matcher matcher = pattern4Integer.matcher(datumNumericBase30);
×
1252

1253
                            if (matcher.matches()){
×
1254
                                // integer case
1255
                                datum = Long.valueOf(datumNumericBase30, 30).toString();
×
1256
                            } else {
1257
                                // double case
1258
                                datum = doubleNumberFormatter.format(base30Tobase10Conversion(datumNumericBase30));
×
1259
                            }
1260

1261
                            // now check format (if date or time)
1262
                            String variableFormatType = variableFormatTypeList[i];
×
1263

1264
                            if (variableFormatType.equals("date")){
×
1265
                                variableTypeFinal[i]=-1;
×
1266
                                long dateDatum = Long.parseLong(datum)*1000L- SPSS_DATE_OFFSET;
×
1267
                                datum = sdf_ymd.format(new Date(dateDatum));
×
1268
                                datumDateFormat = sdf_ymd.toPattern();
×
1269

1270
                            } else if (variableFormatType.equals("time")) {
×
1271
                                variableTypeFinal[i]=-1;
×
1272
                                int formatDecimalPointPosition = formatDecimalPointPositionList.get(i);
×
1273

1274
                                if (printFormatTable.get(variableNameList.get(i)).equals("DTIME")){
×
1275

1276
                                    if (datum.indexOf(".") < 0){
×
1277
                                        long dateDatum  = Long.parseLong(datum)*1000L - SPSS_DATE_BIAS;
×
1278
                                        datum = sdf_dhms.format(new Date(dateDatum));
×
1279
                                        // don't save date format for dtime
1280
                                    } else {
×
1281
                                        // decimal point included
1282
                                        String[] timeData = datum.split("\\.");
×
1283
                                        long dateDatum = Long.parseLong(timeData[0])*1000L - SPSS_DATE_BIAS;
×
1284
                                        StringBuilder sb_time = new StringBuilder(sdf_dhms.format(new Date(dateDatum)));
×
1285

1286
                                        if (formatDecimalPointPosition > 0){
×
1287
                                            sb_time.append("."+timeData[1].substring(0,formatDecimalPointPosition));
×
1288
                                        }
1289

1290
                                        datum = sb_time.toString();
×
1291
                                        // DTIME is weird date/time format that no one uses outside of 
1292
                                        // SPSS; so we are not even going to bother trying to save
1293
                                        // this variable as a datetime. 
1294
                                    }
×
1295

1296
                                } else if (printFormatTable.get(variableNameList.get(i)).equals("DATETIME")){
×
1297
                                    // TODO: 
1298
                                    // (for both datetime and "dateless" time)
1299
                                    // keep the longest of the matching formats - i.e., if there are *some*
1300
                                    // values in the vector that have thousands of a second, that should be 
1301
                                    // part of the saved format!
1302
                                    //  -- L.A. Aug. 12 2014 
1303

1304
                                    if (!datum.contains(".")){
×
1305
                                        long dateDatum  = Long.parseLong(datum)*1000L - SPSS_DATE_OFFSET;
×
1306
                                        datum = sdf_ymdhms.format(new Date(dateDatum));
×
1307
                                        datumDateFormat = sdf_ymdhms.toPattern();
×
1308
                                    } else {
×
1309
                                        // decimal point included
1310
                                        String[] timeData = datum.split("\\.");
×
1311
                                        long dateDatum = Long.parseLong(timeData[0])*1000L- SPSS_DATE_OFFSET;
×
1312
                                        StringBuilder sb_time = new StringBuilder(sdf_ymdhms.format(new Date(dateDatum)));
×
1313

1314
                                        if (formatDecimalPointPosition > 0){
×
1315
                                            sb_time.append("."+timeData[1].substring(0,formatDecimalPointPosition));
×
1316
                                        }
1317

1318
                                        datum = sb_time.toString();
×
1319
                                        datumDateFormat = sdf_ymdhms.toPattern() + (formatDecimalPointPosition > 0 ? ".S" : "" );
×
1320
                                    }
×
1321

1322
                                } else if (printFormatTable.get(variableNameList.get(i)).equals("TIME")){
×
1323

1324
                                    if (!datum.contains(".")){
×
1325
                                        long dateDatum = Long.parseLong(datum)*1000L;
×
1326
                                        datum = sdf_hms.format(new Date(dateDatum));
×
1327
                                        datumDateFormat = sdf_hms.toPattern();
×
1328
                                    } else {
×
1329
                                        // decimal point included
1330
                                        String[] timeData = datum.split("\\.");
×
1331
                                        long dateDatum = Long.parseLong(timeData[0])*1000L;
×
1332
                                        StringBuilder sb_time = new StringBuilder(sdf_hms.format(new Date(dateDatum)));
×
1333

1334
                                        if (formatDecimalPointPosition > 0){
×
1335
                                            sb_time.append("."+timeData[1].substring(0,formatDecimalPointPosition));
×
1336
                                        }
1337

1338
                                        datum = sb_time.toString();
×
1339
                                        datumDateFormat = sdf_hms.toPattern() + (formatDecimalPointPosition > 0 ? ".S" : "" );
×
1340
                                    }
1341
                                }
1342

1343
                            } else if (variableFormatType.equals("other")){
×
1344

1345
                                if (printFormatTable.get(variableNameList.get(i)).equals("WKDAY")){
×
1346
                                    // day of week
1347
                                    variableTypeFinal[i]=-1;
×
1348
                                    datum = SPSSConstants.WEEKDAY_LIST.get(Integer.valueOf(datum)-1);
×
1349

1350
                                } else if (printFormatTable.get(variableNameList.get(i)).equals("MONTH")){
×
1351
                                    // month
1352
                                    variableTypeFinal[i]=-1;
×
1353
                                    datum = SPSSConstants.MONTH_LIST.get(Integer.valueOf(datum)-1);
×
1354
                                }
1355
                            }
1356

1357
                            // since value is not missing, set both values to be the same
1358
                            datumForTabFile = datum;
×
1359

1360
                            // decimal-point check (variable is integer or not)
1361
                            if (variableTypeFinal[i]==0){
×
1362
                                if (datum.contains(".")){
×
1363
                                    variableTypeFinal[i] = 1;
×
1364
                                    decimalVariableSet.add(i);
×
1365
                                }
1366
                            }
1367
                        }
1368

1369
                        if (datumDateFormat != null) {
×
1370
                            dateFormatList[i] = datumDateFormat;
×
1371
                        }
1372
                        casewiseRecordForTabFile[i]= datumForTabFile;
×
1373

1374
                    } // end: if: string vs numeric variable
1375

1376
                } // end:for-loop-i (variable-wise loop)
1377

1378

1379
                // print the i-th case; use casewiseRecord to dump the current case to the tab-delimited file
1380
                pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
×
1381

1382
            } // end: while-block
×
1383
        } finally {
1384
            // close the print writer
1385
            if (pwout != null) {
×
1386
                pwout.close();
×
1387
            }
1388
        }
1389

1390
        ///smd.setDecimalVariables(decimalVariableSet);
1391
        dataTable.setCaseQuantity(new Long(caseQnty));
×
1392

1393
        dbgLog.fine("decodeData(): end");
×
1394
    }
×
1395
    
1396
    
1397
    private void processMissingValueData(){
1398
        /*
1399

1400
         POR's missing-value storage differs form the counterpart of SAV;
1401
         this method transforms the POR-native storage to the SAV-type
1402
         after this process, missingValueTable contains point-type
1403
         missing values for later catStat/sumStat processing;
1404
         range and mixed type cases are stored in invalidDataTable
1405

1406
         missingValueCodeTable=
1407
            {VAR1=[9], VAR2=[A], VAR3=[9, 8], VAR4=[A, 8],
1408
             VAR5=[8, 8, 8], VAR6=[B], VAR7=[B, 8]}
1409

1410
         missingValueTable=
1411
            {VAR1=[-1], VAR2=[-1], VAR3=[-2, -1], VAR4=[-1, -2],
1412
             VAR5=[-1, -2, -3], VAR6=[-2, -1], VAR7=[-3, -2, -1]}
1413

1414

1415
         missingValueTable={VAR1=[], VAR2=[], VAR3=[-1], VAR4=[-2],
1416
             VAR5=[-1, -2, -3], VAR6=[], VAR7=[-2]}
1417

1418
         */
1419

1420
        dbgLog.fine("missingValueCodeTable="+missingValueCodeTable);
×
1421
        Set<Map.Entry<String,List<String>>> msvlc = missingValueCodeTable.entrySet();
×
1422
        for (Map.Entry<String, List<String>> et : msvlc) {
×
1423
            String variable = et.getKey();
×
1424
            dbgLog.fine("variable="+variable);
×
1425
            List<String> codeList = et.getValue();
×
1426
            List<String> valueList = missingValueTable.get(variable);
×
1427
            dbgLog.fine("codeList="+codeList);
×
1428
            dbgLog.fine("valueList="+valueList);
×
1429
            int type;
1430
            InvalidData invalidDataInfo = null;
×
1431
            if (valueList.size() == 3){
×
1432
                if (codeList.get(0).equals("8") && codeList.get(1).equals("8") &&
×
1433
                        codeList.get(2).equals("8") ){
×
1434
                    type = 3;
×
1435
                    invalidDataInfo = new InvalidData(type);
×
1436
                    invalidDataInfo.setInvalidValues(valueList);
×
1437
                } else if (codeList.get(0).equals("9") && codeList.get(1).equals("8")){
×
1438
                    type = -3;
×
1439

1440
                    invalidDataInfo = new InvalidData(type);
×
1441
                    invalidDataInfo.setInvalidValues(valueList.subList(2, 3));
×
1442
                    invalidDataInfo.setInvalidRange(valueList.subList(0, 2));
×
1443

1444
                } else if (codeList.get(0).equals("A") && codeList.get(1).equals("8")){
×
1445
                    type = -3;
×
1446
                    invalidDataInfo = new InvalidData(type);
×
1447
                    invalidDataInfo.setInvalidValues(valueList.subList(2, 3));
×
1448
                    invalidDataInfo.setInvalidRange(valueList.subList(0, 2));
×
1449
                } else if (codeList.get(0).equals("B") && codeList.get(1).equals("8")){
×
1450
                    type = -3;
×
1451
                    invalidDataInfo = new InvalidData(type);
×
1452
                    invalidDataInfo.setInvalidValues(valueList.subList(2, 3));
×
1453
                    invalidDataInfo.setInvalidRange(valueList.subList(0, 2));
×
1454
                } else {
1455
                   dbgLog.severe("unkown missing-value combination(3 values)");
×
1456
                }
1457
                
1458
            } else if (valueList.size() == 2){
×
1459
                if (codeList.get(0).equals("8") && codeList.get(1).equals("8")){
×
1460
                    type = 2;
×
1461
                    invalidDataInfo = new InvalidData(type);
×
1462
                    invalidDataInfo.setInvalidValues(valueList);
×
1463

1464
                } else if (codeList.get(0).equals("9")){
×
1465
                    type = -2;
×
1466
                    invalidDataInfo = new InvalidData(type);
×
1467
                    invalidDataInfo.setInvalidRange(valueList.subList(0, 2));
×
1468

1469
                } else if (codeList.get(0).equals("A")){
×
1470
                    type = -2;
×
1471
                    invalidDataInfo = new InvalidData(type);
×
1472
                    invalidDataInfo.setInvalidRange(valueList.subList(0, 2));
×
1473
                } else if (codeList.get(0).equals("B")){
×
1474
                    type = -2;
×
1475
                    invalidDataInfo = new InvalidData(type);
×
1476
                    invalidDataInfo.setInvalidRange(valueList.subList(0, 2));
×
1477

1478
                } else {
1479
                    dbgLog.severe("unknown missing value combination(2 values)");
×
1480
                }
1481
            } else if (valueList.size() == 1){
×
1482
                if (codeList.get(0).equals("8")){
×
1483
                    type = 1;
×
1484
                    invalidDataInfo = new InvalidData(type);
×
1485
                    invalidDataInfo.setInvalidValues(valueList);
×
1486
                } else {
1487
                    dbgLog.severe("unknown missing value combination(2 values)");
×
1488
                }
1489
            }
1490
            invalidDataTable.put(variable, invalidDataInfo);
×
1491
        }
×
1492

1493
        dbgLog.fine("invalidDataTable="+invalidDataTable);
×
1494

1495

1496
        Set<Map.Entry<String,List<String>>> msvl = missingValueTable.entrySet();
×
1497
        for (Map.Entry<String, List<String>> et : msvl) {
×
1498
            String variable = et.getKey();
×
1499
            List<String> valueList = et.getValue();
×
1500

1501
            List<String> codeList = missingValueCodeTable.get(variable);
×
1502

1503
            dbgLog.finer("var="+variable+"\tvalue="+valueList+"\t code"+ codeList);
×
1504
            List<String> temp = new ArrayList<>();
×
1505
            for (int j=0; j<codeList.size(); j++){
×
1506
                if (codeList.get(j).equals("8")){
×
1507
                  temp.add(valueList.get(j));
×
1508
                }
1509
            }
1510
            missingValueTable.put(variable, temp);
×
1511
        }
×
1512
        dbgLog.fine("missingValueTable="+missingValueTable);
×
1513
    }
×
1514
    
1515
    
1516
    
1517
    // utility methods  -----------------------------------------------------//
1518
    
1519
    private int parseNumericField(BufferedReader reader) throws IOException{
1520
        String temp = null;
×
1521
        char[] tmp = new char[1];
×
1522
        StringBuilder sb = new StringBuilder();
×
1523
        while(reader.read(tmp) > 0 ){
×
1524
            temp = Character.toString(tmp[0]);//new String(tmp);
×
1525
            if (temp.equals("/")){
×
1526
                break;
×
1527
            } else {
1528
                sb.append(temp);
×
1529
            }
1530
            //temp = sb.toString();//new String(tmp);
1531
        }
1532
        String base30numberString = sb.toString();
×
1533
        dbgLog.finer("base30numberString="+base30numberString);
×
1534
        int base10equivalent = Integer.valueOf(base30numberString, 30);
×
1535
        dbgLog.finer("base10equivalent="+base10equivalent);
×
1536
        return base10equivalent;
×
1537
    }
1538

1539

1540
    private String parseStringField(BufferedReader reader) throws IOException{
1541
        String temp = null;
×
1542
        char[] tmp = new char[1];
×
1543
        StringBuilder sb = new StringBuilder();
×
1544
        while(reader.read(tmp) > 0 ){
×
1545
            temp = Character.toString(tmp[0]);//new String(tmp);
×
1546
            if (temp.equals("/")){
×
1547
                break;
×
1548
            } else {
1549
                sb.append(temp);
×
1550
            }
1551
            //temp = sb.toString();//new String(tmp);
1552
        }
1553
        String base30numberString = sb.toString();
×
1554
        //dbgLog.fine("base30numberString="+base30numberString);
1555
        int base10equivalent = Integer.valueOf(base30numberString, 30);
×
1556
        //dbgLog.fine("base10equivalent="+base10equivalent);
1557
        char[] stringBody = new char[base10equivalent];
×
1558
        reader.read(stringBody);
×
1559
        String stringData = new String(stringBody);
×
1560
        dbgLog.finer("stringData="+stringData);
×
1561
        return stringData;
×
1562
    }
1563

1564

1565

1566
    private String getNumericFieldAsRawString(BufferedReader reader) throws IOException{
1567
        String temp = null;
×
1568
        char[] tmp = new char[1];
×
1569
        StringBuilder sb = new StringBuilder();
×
1570
        while(reader.read(tmp) > 0 ){
×
1571
            temp = Character.toString(tmp[0]);//new String(tmp);
×
1572
            if (temp.equals("/")){
×
1573
                break;
×
1574
            } else {
1575
                sb.append(temp);
×
1576
            }
1577
            //temp = sb.toString();//new String(tmp);
1578
        }
1579
        String base30numberString = sb.toString();
×
1580
        dbgLog.finer("base30numberString="+base30numberString);
×
1581

1582
        return base30numberString;
×
1583
    }
1584

1585

1586
    private double base30Tobase10Conversion(String base30String){
1587

1588
        // new base(radix) number
1589
        int oldBase = 30;
×
1590
        //dbgLog.fine("base30String="+base30String);
1591

1592
        // trim white-spaces from the both ends
1593
        String base30StringClean = StringUtils.trim(base30String);
×
1594
        //dbgLog.fine("base30StringClean="+base30StringClean);
1595

1596
        // check the negative/positive sign
1597
        boolean isNegativeNumber = false;
×
1598
        boolean hasPositiveSign = false;
×
1599
        if (base30StringClean.startsWith("-")){
×
1600
            isNegativeNumber = true;
×
1601
        }
1602

1603
        if (base30StringClean.startsWith("+")){
×
1604
            hasPositiveSign = true;
×
1605
        }
1606

1607
        // remove the sign if exits
1608
        String base30StringNoSign = null;
×
1609

1610
        if ((isNegativeNumber) ||(hasPositiveSign)){
×
1611
            base30StringNoSign = base30StringClean.substring(1);
×
1612
        } else {
1613
            base30StringNoSign = base30StringClean;
×
1614
        }
1615

1616
        // check the scientific notation
1617
        // if so, divide it into the significand and exponent
1618
        String significand  = null;
×
1619
        long exponent = 0;
×
1620

1621
        int plusIndex = base30StringNoSign.indexOf("+");
×
1622
        int minusIndex = base30StringNoSign.indexOf("-");
×
1623

1624
        if (plusIndex> 0){
×
1625
            significand = base30StringNoSign.substring(0, plusIndex);
×
1626
            exponent = Long.valueOf( base30StringNoSign.substring(plusIndex+1), oldBase );
×
1627

1628
        } else if (minusIndex > 0){
×
1629
            significand = base30StringNoSign.substring(0, minusIndex);
×
1630
            exponent = -1 * Long.valueOf( base30StringNoSign.substring(minusIndex+1), oldBase );
×
1631

1632
        } else {
1633
            significand = base30StringNoSign;
×
1634
        }
1635

1636

1637
        // "move" decimal point; for each shift right, subtract one from exponent; end result is a string with no decimal
1638
        int decimalIndex = significand.indexOf(".");
×
1639
        if (decimalIndex != -1) {
×
1640
            exponent -= (significand.length() - (decimalIndex + 1) );
×
1641
            significand = significand.substring(0, decimalIndex) + significand.substring( decimalIndex + 1 );
×
1642
        }
1643

1644
        // TODO: Verify that the MathContext/Rounding methods are OK:
1645
        // -- L.A. 4.0 beta
1646
        MathContext mc = new MathContext(15,RoundingMode.HALF_UP);
×
1647
        long base10Significand = Long.parseLong(significand, oldBase);
×
1648
        BigDecimal base10value = new BigDecimal( String.valueOf(base10Significand), mc );
×
1649
        BigDecimal exponentialComponent = new BigDecimal("1", mc);
×
1650

1651
        for (int g=0; g < Math.abs(exponent); g++) {
×
1652
            exponentialComponent = exponentialComponent.multiply(new BigDecimal("30", mc));
×
1653
        }
1654

1655
        if (exponent >= 0) {
×
1656
            base10value = base10value.multiply(exponentialComponent, mc);
×
1657
        } else {
1658
            base10value = base10value.divide(exponentialComponent, mc);
×
1659
        }
1660

1661
        // negative sign if applicable
1662
        if (isNegativeNumber){
×
1663
            base10value = base10value.multiply(new BigDecimal("-1", mc));
×
1664
        }
1665

1666
        return base10value.doubleValue();
×
1667
    }
1668
    
1669
    void assignValueLabels(Map<String, Map<String, String>> valueLabelTable) {
1670
        // Let's go through all the categorical value label mappings and 
1671
        // assign them to the correct variables: 
1672
        
1673
        for (DataVariable dataVariable : dataTable.getDataVariables()) {
×
1674
            String varName = dataVariable.getName();
×
1675
            
1676
            Map<String, String> valueLabelPairs = valueLabelTable.get(valueVariableMappingTable.get(varName));
×
1677
            if (valueLabelPairs != null && !valueLabelPairs.isEmpty()) {
×
1678
                for (String value : valueLabelPairs.keySet()) {
×
1679
                    
1680
                    VariableCategory cat = new VariableCategory();
×
1681
                    cat.setValue(value);
×
1682
                    cat.setLabel(valueLabelPairs.get(value));
×
1683

1684
                    /* cross-link the variable and category to each other: */
1685
                    cat.setDataVariable(dataVariable);
×
1686
                    dataVariable.getCategories().add(cat);
×
1687
                }
×
1688
            }
1689
        }
×
1690
    }
×
1691
    
1692
    private void print2Darray(Object[][] datatable, String title){
1693
        dbgLog.fine(title);
×
1694
        for (Object[] datatable1 : datatable) {
×
1695
            dbgLog.fine(StringUtils.join(datatable1, "|"));
×
1696
        }
1697
    }    
×
1698
    
1699
    private Map<String,String> createLabelMap (File extendedLabelsFile) {
1700
        Map<String,String> varLabelMap = new HashMap<>();
×
1701

1702
        // Simply open the text file supplied, and read the variable-lable                                                                   
1703
        // pairs supplied:                                                                                                                   
1704

1705
        BufferedReader labelsFileReader = null;
×
1706

1707
        try {
1708
            labelsFileReader = new BufferedReader(new InputStreamReader(new FileInputStream(extendedLabelsFile)));
×
1709

1710
            String inLine;
1711

1712
            while ((inLine = labelsFileReader.readLine() ) != null) {
×
1713
                String[] valueTokens = inLine.split("\t", 2);
×
1714

1715
                if (valueTokens[0] != null && !"".equals(valueTokens[0]) &&
×
1716
                    valueTokens[1] != null && !"".equals(valueTokens[1])) {
×
1717

1718
                    valueTokens[1] = valueTokens[1].replaceAll("[\n\r]", "");
×
1719
                    // A very temporary fix for the varstr limit in the database!
1720
                    // -- L.A. 4.0 beta 11
1721
                    // TODO: change the label field to "text" by beta 12!!
1722
                    if (valueTokens[1].length() > 255) {
×
1723
                        valueTokens[1] = valueTokens[1].substring(0, 255);
×
1724
                    }
1725
                    varLabelMap.put(valueTokens[0], valueTokens[1]);
×
1726
                }
1727
            }
×
1728

1729
        } catch (java.io.FileNotFoundException fnfex) {
×
1730
            dbgLog.warning("Ingest: could not open Extended Labels file");
×
1731
            dbgLog.warning(fnfex.getMessage());
×
1732
            return null;
×
1733
        } catch (IOException ioex) {
×
1734
            dbgLog.warning("Ingest: caught exception trying to process Labels File");
×
1735
            dbgLog.warning(ioex.getMessage());
×
1736
            return null;
×
1737
        } finally {
1738
            if (labelsFileReader != null) {
×
1739
                try {labelsFileReader.close();}catch(Exception x){};
×
1740
            }
1741
        }
1742

1743
        return varLabelMap;
×
1744
    }
1745

1746
    
1747
}
1748

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc