• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IQSS / dataverse / #22985

23 Aug 2024 06:32PM CUT coverage: 20.61% (-0.2%) from 20.791%
#22985

Pull #10781

github

landreev
added an upfront locks check to the /addGlobusFiles api #10623
Pull Request #10781: Improved handling of Globus uploads

4 of 417 new or added lines in 15 files covered. (0.96%)

4194 existing lines in 35 files now uncovered.

17388 of 84365 relevant lines covered (20.61%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
1
/*
2
   Copyright (C) 2005-2013, by the President and Fellows of Harvard College.
3

4
   Licensed under the Apache License, Version 2.0 (the "License");
5
   you may not use this file except in compliance with the License.
6
   You may obtain a copy of the License at
7

8
         http://www.apache.org/licenses/LICENSE-2.0
9

10
   Unless required by applicable law or agreed to in writing, software
11
   distributed under the License is distributed on an "AS IS" BASIS,
12
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
   See the License for the specific language governing permissions and
14
   limitations under the License.
15

16
   Dataverse Network - A web application to share, preserve and analyze research data.
17
   Developed at the Institute for Quantitative Social Science, Harvard University.
18
   Version 3.0.
19
*/
20
package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.rdata;
21

22

23
import java.io.*;
24
import java.io.InputStreamReader;
25
import java.text.*;
26
import java.util.logging.*;
27
import java.util.*;
28

29
import jakarta.inject.Inject;
30

31
// Rosuda Wrappers and Methods for R-calls to Rserve
32
import edu.harvard.iq.dataverse.settings.JvmSettings;
33
import org.rosuda.REngine.REXP;
34
import org.rosuda.REngine.REXPMismatchException;
35
import org.rosuda.REngine.RList;
36
import org.rosuda.REngine.Rserve.RFileInputStream;
37
import org.rosuda.REngine.Rserve.RFileOutputStream;
38
import org.rosuda.REngine.Rserve.*;
39

40
import edu.harvard.iq.dataverse.DataTable;
41
import edu.harvard.iq.dataverse.datavariable.DataVariable;
42
import edu.harvard.iq.dataverse.datavariable.VariableCategory;
43

44
import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataFileReader;
45
import edu.harvard.iq.dataverse.ingest.tabulardata.spi.TabularDataFileReaderSpi;
46
import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest;
47
import edu.harvard.iq.dataverse.rserve.*;
48

49

50
import org.apache.commons.lang3.RandomStringUtils;
51

52
/**
53
 * Dataverse 4.0 implementation of <code>TabularDataFileReader</code> for the 
54
 * RData Binary Format.
55
 * 
56
 * Based on the original implementation for DVN v3.*, by Matt Owen (2012-2013),
57
 * completed by Leonid Andreev in 2013. 
58
 * 
59
 * This version is a serious re-write of the plugin, using the new 4.0 
60
 * ingest plugin architecture. 
61
 * 
62
 * original 
63
 * @author Matthew Owen
64
 * @author Leonid Andreev
65
 
66
 * This implementation uses external R-Scripts to do the bulk of the processing.
67
 */
68
public class RDATAFileReader extends TabularDataFileReader {
69
    
70
// Date-time things
UNCOV
71
  public static final String[] FORMATS = { "other", "date", "date-time", "date-time-timezone" };
×
72

73
  // R-ingest recognition files
74
  private static final String[] FORMAT_NAMES = { "RDATA", "Rdata", "rdata" };
×
75
  private static final String[] EXTENSIONS = { "Rdata", "rdata" };
×
UNCOV
76
  private static final String[] MIME_TYPE = { "application/x-rlang-transport" };
×
77
  
78
  // R Scripts
79
  static private String RSCRIPT_CREATE_WORKSPACE = "";
×
80
  static private String RSCRIPT_DATASET_INFO_SCRIPT = "";
×
81
  static private String RSCRIPT_GET_DATASET = "";
×
82
  static private String RSCRIPT_GET_LABELS = "";
×
UNCOV
83
  static private String RSCRIPT_WRITE_DVN_TABLE = "";
×
84
  
85
  // RServe static variables
86
  private final String RSERVE_HOST;
87
  private final int RSERVE_PORT;
88
  private final String RSERVE_USER;
89
  private final String RSERVE_PASSWORD;
90
  
91
  // TODO: 
92
  // we're not using these time/data formats for anything, are we?
93
  // DATE FORMATS
UNCOV
94
  private static SimpleDateFormat[] DATE_FORMATS = new SimpleDateFormat[] {
×
95
    new SimpleDateFormat("yyyy-MM-dd")
96
  };
97
  
98
  // TIME FORMATS
UNCOV
99
  private static SimpleDateFormat[] TIME_FORMATS = new SimpleDateFormat[] {
×
100
    // Date-time up to milliseconds with timezone, e.g. 2013-04-08 13:14:23.102 -0500
101
    new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS z"),
102
    // Date-time up to milliseconds, e.g. 2013-04-08 13:14:23.102
103
    new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"),
104
    // Date-time up to seconds with timezone, e.g. 2013-04-08 13:14:23 -0500
105
    new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z"),
106
    // Date-time up to seconds and no timezone, e.g. 2013-04-08 13:14:23
107
    new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
108
  };
109
  
110
  // Logger
UNCOV
111
  private static final Logger LOG = Logger.getLogger(RDATAFileReader.class.getPackage().getName());
×
112

113
 
114
  TabularDataIngest ingesteddata = new TabularDataIngest();
×
UNCOV
115
  private DataTable dataTable = new DataTable();
×
116
    
117
  // Process ID, used partially in the generation of temporary directories
118
  private String mPID;
119
  
120
  // Object containing all the informatin for an R-workspace (including
121
  // temporary directories on and off server)
122
  private RWorkspace mRWorkspace;
123
  
124
  
125

126
  // Number formatter
UNCOV
127
  NumberFormat doubleNumberFormatter = new DecimalFormat();
×
128

129
  // Builds R Requests for an R-server
130
  private RRequestBuilder mRequestBuilder;
131
  /*
132
   * Initialize Static Variables
133
   * This is primarily to construct the R-Script
134
   */
135
  static {
136
    // Load R Scripts into memory, so that we can run them via R-serve
137
    RSCRIPT_WRITE_DVN_TABLE = readLocalResource("scripts/write.table.R");
×
138
    RSCRIPT_GET_DATASET = readLocalResource("scripts/get.dataset.R");
×
139
    RSCRIPT_CREATE_WORKSPACE = readLocalResource("scripts/create.workspace.R");
×
140
    RSCRIPT_GET_LABELS = readLocalResource("scripts/get.labels.R");
×
UNCOV
141
    RSCRIPT_DATASET_INFO_SCRIPT = readLocalResource("scripts/dataset.info.script.R");
×
142
    
143
    
144
    LOG.finer("R SCRIPTS AS STRINGS --------------");
×
145
    LOG.finer(RSCRIPT_WRITE_DVN_TABLE);
×
146
    LOG.finer(RSCRIPT_GET_DATASET);
×
147
    LOG.fine(RSCRIPT_CREATE_WORKSPACE);
×
148
    LOG.finer(RSCRIPT_GET_LABELS);
×
149
    LOG.finer(RSCRIPT_DATASET_INFO_SCRIPT);
×
150
    LOG.finer("END OF R SCRIPTS AS STRINGS -------");
×
UNCOV
151
   }
×
152
  
153
  /* 
154
   * TODO: 
155
   * Switch to the implementation in iq.dataverse.rserve
156
   * -- L.A. 4.0 alpha 1
157
  */
158
  private class RWorkspace {
159
    public String mParent, mWeb, mDvn, mDsb;
160
    public File mDataFile, mCsvDataFile;
161
    public RRequest mRRequest;
162
    public BufferedInputStream mInStream;
163
    /**
164
     * 
165
     */
166
    public RWorkspace () {
×
167
      mParent = mWeb = mDvn = mDsb = "";
×
168
      mDataFile = null;
×
169
      mCsvDataFile = null;
×
170
      mInStream = null;
×
UNCOV
171
    }
×
172
    /**
173
     * Create the Actual R Workspace
174
     */
175
    public void create () {
176
      try {
177
        LOG.fine("RDATAFileReader: Creating R Workspace");
×
178
        RRequestBuilder scriptBuilder = mRequestBuilder.script(RSCRIPT_CREATE_WORKSPACE);
×
UNCOV
179
        LOG.fine("got a sript request builder");
×
180
        
181
        RRequest scriptRequest = scriptBuilder.build();
×
UNCOV
182
        LOG.fine("script request built.");
×
183
        
184
        /*
185
        REXP result = mRequestBuilder
186
                .script(RSCRIPT_CREATE_WORKSPACE)
187
                .build()
188
                .eval();
189
        */
UNCOV
190
        REXP result = scriptRequest.eval(); 
×
191
        
UNCOV
192
        LOG.fine("evaluated the script");
×
193
        
UNCOV
194
        RList directoryNames = result.asList();
×
195
        
UNCOV
196
        mParent = null; 
×
197
        
198
        if (directoryNames != null) {
×
199
            if (directoryNames.at("parent") != null) {
×
UNCOV
200
                mParent = directoryNames.at("parent").asString();
×
201
            } else {
202
                LOG.fine("WARNING: directoryNames at \"parent\" is null!");
×
203
                if(directoryNames.isEmpty()) {
×
UNCOV
204
                    LOG.fine("WARNING: directoryNames is empty!");
×
205
                } else {
206
                    Set<String> dirKeySet = directoryNames.keySet();
×
UNCOV
207
                    Iterator iter = dirKeySet.iterator();
×
208
                    String key;
209

210
                    while (iter.hasNext()) {
×
211
                        key = (String) iter.next();
×
UNCOV
212
                        LOG.fine("directoryNames list key: "+key);
×
213
                    }
UNCOV
214
                }
×
215
            }
216
            
217
        } else {
UNCOV
218
            LOG.fine("WARNING: directoryNames is null!");
×
219
        }
220
        
UNCOV
221
        LOG.fine(String.format("RDATAFileReader: Parent directory of R Workspace is %s", mParent));
×
222
        
UNCOV
223
        LOG.fine("RDATAFileReader: Creating file handle");
×
224
        
UNCOV
225
        mDataFile = new File(mParent, "data.Rdata");
×
226
      }
227
      catch (Exception E) {
×
228
        LOG.warning("RDATAFileReader: Could not create R workspace");
×
229
        mParent = mWeb = mDvn = mDsb = "";
×
230
      }
×
UNCOV
231
    }
×
232
    /**
233
     * Destroy the Actual R Workspace
234
     */
235
    public void destroy () {
236
      String destroyerScript = new StringBuilder("")
×
237
              .append(String.format("unlink(\"%s\", TRUE, TRUE)", mParent))
×
UNCOV
238
              .toString();
×
239
      
240
      try {
UNCOV
241
        LOG.fine("RDATAFileReader: Destroying R Workspace");
×
242

243
        mRRequest = mRequestBuilder
×
244
                .script(destroyerScript)
×
UNCOV
245
                .build();
×
246
        
UNCOV
247
        mRRequest.eval();
×
248
        
UNCOV
249
        LOG.fine("RDATAFileReader: DESTROYED R Workspace");
×
250
      }
251
      catch (Exception ex) {
×
252
        LOG.warning("RDATAFileReader: R Workspace was not destroyed");
×
253
        LOG.fine(ex.getMessage());
×
254
      }
×
UNCOV
255
    }
×
256
    /**
257
     * Create the Data File to Use for Analysis, etc.
258
     */
259
    public File dataFile (String target, String prefix, int size) {
260
      
UNCOV
261
      String fileName = String.format("DVN.dataframe.%s.Rdata", mPID);
×
262
      
UNCOV
263
      mDataFile = new File(mParent, fileName);
×
264
                
265
      RFileInputStream RInStream = null;
×
UNCOV
266
      OutputStream outStream = null;
×
267
      
UNCOV
268
      RRequest req = mRequestBuilder.build();
×
269
      
270
      try {
271
        outStream = new BufferedOutputStream(new FileOutputStream(mDataFile));
×
UNCOV
272
        RInStream = req.getRConnection().openFile(target);
×
273
        
274
        if (size < 1024*1024*500) {
×
275
          int bufferSize = size;
×
276
          byte [] outputBuffer = new byte[bufferSize];
×
277
          RInStream.read(outputBuffer);
×
UNCOV
278
          outStream.write(outputBuffer, 0, size);
×
279
        }
280
        
281
        RInStream.close();
×
282
        outStream.close();
×
UNCOV
283
        return mDataFile;
×
284
      }
285
      catch (FileNotFoundException exc) {
×
286
        exc.printStackTrace();
×
287
        LOG.warning("RDATAFileReader: FileNotFound exception occurred");
×
UNCOV
288
        return mDataFile;
×
289
      }
290
      catch (IOException exc) {
×
291
        exc.printStackTrace();
×
UNCOV
292
        LOG.warning("RDATAFileReader: IO exception occurred");
×
293
      }
294

295
      // Close R input data stream
UNCOV
296
      if (RInStream != null) {
×
297
        try {
UNCOV
298
          RInStream.close();
×
299
        }
300
        catch (IOException exc) {
×
UNCOV
301
        }
×
302
      }
303

304
      // Close output data stream
UNCOV
305
      if (outStream != null) {
×
306
        try {
UNCOV
307
          outStream.close();
×
308
        }
309
        catch (IOException ex) {
×
UNCOV
310
        }
×
311
      }
312
      
UNCOV
313
      return mDataFile;
×
314
    }
315
    /**
316
     * Set the stream
317
     * @param inStream 
318
     */
319
    public void stream (BufferedInputStream inStream) {
320
      mInStream = inStream;
×
UNCOV
321
    }
×
322
    /**
323
     * Save the Rdata File Temporarily
324
     */
325
    private File saveRdataFile () {
UNCOV
326
      LOG.fine("RDATAFileReader: Saving Rdata File from Input Stream");
×
327
      
328
      if (mInStream == null) {
×
329
        LOG.fine("RDATAFileReader: No input stream was specified. Not writing file and returning NULL");
×
UNCOV
330
        return null;
×
331
      }
332
      
333
      byte [] buffer = new byte [1024];
×
334
      int bytesRead = 0;
×
335
      RFileOutputStream outStream = null;
×
UNCOV
336
      RConnection rServerConnection = null;
×
337
      
338
      try {
339
        LOG.fine("RDATAFileReader: Opening R connection");
×
UNCOV
340
        rServerConnection = new RConnection(RSERVE_HOST, RSERVE_PORT);
×
341
        
342
        LOG.fine("RDATAFileReader: Logging into R connection");
×
UNCOV
343
        rServerConnection.login(RSERVE_USER, RSERVE_PASSWORD);
×
344
        
345
        LOG.fine("RDATAFileReader: Attempting to create file");
×
UNCOV
346
        outStream = rServerConnection.createFile(mDataFile.getAbsolutePath());
×
347
        
UNCOV
348
        LOG.fine(String.format("RDATAFileReader: File created on server at %s", mDataFile.getAbsolutePath()));
×
349
      }
350
      catch (IOException ex) {
×
UNCOV
351
        LOG.warning("RDATAFileReader: Could not create file on R Server");
×
352
      }
353
      catch (RserveException ex) {
×
354
        LOG.warning("RDATAFileReader: Could not connect to R Server");
×
UNCOV
355
      }
×
356
      
357
      /*
358
       * Read stream and write to destination file
359
       */
360
      try {
361
        // Read from local file and write to rserver 1kb at a time
362
        while (mInStream.read(buffer) != -1) {
×
363
          outStream.write(buffer);
×
UNCOV
364
          bytesRead++;
×
365
        }
366
      }
367
      catch (IOException ex) {
×
368
        LOG.warning("RDATAFileReader: Could not write to file");
×
UNCOV
369
        LOG.fine(String.format("Error message: %s", ex.getMessage()));
×
370
      }
371
      catch (NullPointerException ex) {
×
372
        LOG.warning("RDATAFileReader: Data file has not been specified");
×
UNCOV
373
      }
×
374
      
375
      // Closing R server connection
376
      if (rServerConnection != null) {
×
377
        LOG.fine("RDATAFileReader: Closing R server connection");
×
UNCOV
378
        rServerConnection.close();
×
379
      }
380
      
UNCOV
381
      return mDataFile;
×
382
    }
383
    private File saveCsvFile () {
384
      // Specify CSV File Location on Server
UNCOV
385
      mCsvDataFile = new File(mRWorkspace.getRdataFile().getParent(), "data.csv");
×
386

387
      // 
388
      String csvScript = new StringBuilder("")
×
389
        .append("options(digits.secs=3)")
×
390
        .append("\n")
×
391
        .append(RSCRIPT_WRITE_DVN_TABLE)
×
392
        .append("\n")
×
393
        .append(String.format("load(\"%s\")", mRWorkspace.getRdataAbsolutePath()))
×
394
        .append("\n")
×
395
        .append(RSCRIPT_GET_DATASET)
×
396
        .append("\n")
×
397
        .append(String.format("write.dvn.table(data.set, file=\"%s\")", mCsvDataFile.getAbsolutePath()))
×
UNCOV
398
        .toString();
×
399
      
400
      // 
UNCOV
401
      RRequest csvRequest = mRequestBuilder.build();
×
402
      
403
      LOG.fine(String.format("RDATAFileReader: Attempting to write table to `%s`", mCsvDataFile.getAbsolutePath()));
×
UNCOV
404
      csvRequest.script(csvScript).eval();
×
405

UNCOV
406
      return mCsvDataFile;
×
407
    }
408
    /**
409
     * Return Rdata File Handle on R Server
410
     * @return File asdasd 
411
     */
412
    public File getRdataFile () {
UNCOV
413
      return mDataFile;
×
414
    }
415
    /**
416
     * Return Location of Rdata File on R Server
417
     * @return the file location as a string on the (potentially) remote R server
418
     */
419
    public String getRdataAbsolutePath () {
UNCOV
420
      return mDataFile.getAbsolutePath();
×
421
    }
422
  }
423
  /**
424
   * Constructs a <code>RDATAFileReader</code> instance from its "Spi" Class
425
   * @param originator a <code>StatDataFileReaderSpi</code> object.
426
   */
427
  public RDATAFileReader(TabularDataFileReaderSpi originator) {
428

UNCOV
429
    super(originator);
×
430
    
431
    // These settings have sane defaults in resources/META-INF/microprofile-config.properties,
432
    // ready to be overridden by a sysadmin. Every time a file would be read with this file reader,
433
    // a new reader will be created, reading from the cached config source settings with minimal overhead.
UNCOV
434
    this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup();
×
435
    int port;
436
    try {
437
      port = JvmSettings.RSERVE_PORT.lookup(Integer.class);
×
438
    } catch (IllegalArgumentException e) {
×
439
      LOG.log(Level.SEVERE, "Could not parse value for " + JvmSettings.RSERVE_PORT.getScopedKey() + ", defaulting to 6311", e);
×
440
      port = 6311;
×
441
    }
×
442
    this.RSERVE_PORT = port;
×
443
    this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup();
×
UNCOV
444
    this.RSERVE_PASSWORD = JvmSettings.RSERVE_PASSWORD.lookup();
×
445

UNCOV
446
    LOG.fine("RDATAFileReader: INSIDE RDATAFileReader");
×
447

448
    // Create request builder.
449
    // This object is used throughout as an RRequest factory
450
    mRequestBuilder = new RRequestBuilder()
×
451
            .host(RSERVE_HOST)
×
452
            .port(RSERVE_PORT)
×
453
            .user(RSERVE_USER)
×
UNCOV
454
            .password(RSERVE_PASSWORD);
×
455
    
456
    // Create R Workspace
UNCOV
457
    mRWorkspace = new RWorkspace();
×
458
    
459
    mPID = RandomStringUtils.randomNumeric(6);
×
UNCOV
460
  }
×
461

462
  private void init() throws IOException {
463
    doubleNumberFormatter.setGroupingUsed(false);
×
UNCOV
464
    doubleNumberFormatter.setMaximumFractionDigits(340);
×
465
    
UNCOV
466
  }
×
467
  
468
  /**
469
   * Read the Given RData File
470
   * @param stream a <code>BufferedInputStream</code>.
471
   * @param ignored
472
   * @return an <code>TabularDataIngest</code> object
473
   * @throws java.io.IOException if a reading error occurs.
474
   */
475
    @Override
476
    public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariableHeader, File dataFile) throws IOException {
477

UNCOV
478
        init();
×
479

480
        // Create Request object
UNCOV
481
        LOG.fine("RDATAFileReader: Creating RRequest object from RRequestBuilder object");
×
482

483
        try {
484
            // Create R Workspace
485
            mRWorkspace.stream(stream);
×
486
            mRWorkspace.create();
×
487
            mRWorkspace.saveRdataFile();
×
UNCOV
488
            mRWorkspace.saveCsvFile();
×
489

490
            // Copy CSV file to a local, temporary directory
491
            // Additionally, this sets the "tabDelimitedDataFile" property of the FileInformation
UNCOV
492
            File localCsvFile = transferCsvFile(mRWorkspace.mCsvDataFile);
×
493

494
            // Generate and save all the information about data set; this creates all 
495
            // the DataVariable objects, among other things:
UNCOV
496
            getDataFrameInformation();
×
497

498
            // Read and parse the TAB-delimited file saved by R, above; do the 
499
            // necessary post-processinga and filtering, and save the resulting 
500
            // TAB file as tabFileDestination, below. This is the file we'll be 
501
            // using to calculate the UNF, and for the storage/preservation of the
502
            // dataset. 
503
            // IMPORTANT: this must be done *after* the variable metadata has been 
504
            // created!
505
            // - L.A. 
506
            RTabFileParser csvFileReader = new RTabFileParser('\t');
×
UNCOV
507
            BufferedReader localBufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(localCsvFile), "UTF-8"));
×
508

509
            File tabFileDestination = File.createTempFile("data-", ".tab");
×
UNCOV
510
            PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath(), "UTF-8");
×
511
        
UNCOV
512
            int lineCount = csvFileReader.read(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter);
×
513

UNCOV
514
            LOG.fine("RDATAFileReader: successfully read "+lineCount+" lines of tab-delimited data.");
×
515
        
UNCOV
516
            dataTable.setUnf("UNF:pending");
×
517
        
518
            ingesteddata.setTabDelimitedFile(tabFileDestination);
×
UNCOV
519
            ingesteddata.setDataTable(dataTable);
×
520

521
            // Destroy R workspace
522
            mRWorkspace.destroy();
×
523
        } catch (Exception ex) {
×
524
            throw new IOException ("Unknown exception occured during ingest; "+ex.getMessage());
×
UNCOV
525
        }
×
526

UNCOV
527
        LOG.fine("RDATAFileReader: Leaving \"read\" function");
×
528

UNCOV
529
        return ingesteddata;
×
530
    }
531
  
532
  /**
533
   * Copy Remote File on R-server to a Local Target
534
   * @param target a target on the remote r-server
535
   * @return 
536
   */
537
  private File transferCsvFile (File target) {
538
    File destination;
539
    FileOutputStream csvDestinationStream;
540
    
541
    try {
542
      destination = File.createTempFile("data", ".csv");
×
543
      LOG.fine(String.format("RDATAFileReader: Writing local CSV File to `%s`", destination.getAbsolutePath()));
×
UNCOV
544
      csvDestinationStream = new FileOutputStream(destination);
×
545
    }
546
    catch (IOException ex) {
×
547
      LOG.warning("RDATAFileReader: Could not create temporary file!");
×
548
      return null;
×
UNCOV
549
    }
×
550
    
551
    try {
552
      // Open connection to R-serve
553
      RConnection rServeConnection = new RConnection(RSERVE_HOST, RSERVE_PORT);
×
UNCOV
554
      rServeConnection.login(RSERVE_USER, RSERVE_PASSWORD);
×
555
      
556
      // Open file for reading from R-serve
UNCOV
557
      RFileInputStream rServeInputStream = rServeConnection.openFile(target.getAbsolutePath());
×
558
      
559
      int b;
560
      
UNCOV
561
      LOG.fine("RDATAFileReader: Beginning to write to local destination file");
×
562
      
563
      // Read from stream one character at a time
UNCOV
564
      while ((b = rServeInputStream.read()) != -1) {
×
565
        // Write to the *local* destination file
UNCOV
566
        csvDestinationStream.write(b);
×
567
      }
568
      
569
      LOG.fine(String.format("RDATAFileReader: Finished writing from destination `%s`", target.getAbsolutePath()));
×
UNCOV
570
      LOG.fine(String.format("RDATAFileReader: Finished copying to source `%s`", destination.getAbsolutePath()));
×
571
      
572
      
573
      LOG.fine("RDATAFileReader: Closing CSVFileReader R Connection");
×
UNCOV
574
      rServeConnection.close();
×
575
    }
576
    /*
577
     * TO DO: Make this error catching more intelligent
578
     */
579
    catch (Exception ex) {
×
UNCOV
580
    }
×
581
    
UNCOV
582
    return destination;
×
583
  }
584
  
585
  
586
    /**
587
     *
588
     * Runs an R-script that extracts meta-data from the *original* Rdata
589
     * object, then parses its output and creates DataVariable objects.
590
     *
591
     * @throws IOException if something bad happens?
592
     */
593
    private void getDataFrameInformation() {
UNCOV
594
        LOG.fine("RDATAFileReader: Entering `getDataFrameInformation` function");
×
595

596
        // Store variable names
UNCOV
597
        String[] variableNames = {};
×
598

UNCOV
599
        String parentDirectory = mRWorkspace.getRdataFile().getParent();
×
600

601
        String fileInfoScript = new StringBuilder("")
×
602
                .append(String.format("load(\"%s\")\n", mRWorkspace.getRdataAbsolutePath()))
×
603
                .append(String.format("setwd(\"%s\")\n", parentDirectory))
×
604
                .append(RSCRIPT_GET_DATASET)
×
605
                .append("\n")
×
606
                .append(RSCRIPT_DATASET_INFO_SCRIPT)
×
UNCOV
607
                .toString();
×
608

609
        try {
610
            RRequest request = mRequestBuilder.build();
×
611
            request.script(fileInfoScript);
×
UNCOV
612
            RList fileInformation = request.eval().asList();
×
613

UNCOV
614
            RList metaInfo = fileInformation.at("meta.info").asList();
×
615

616
            int varQnty = 0;
×
UNCOV
617
            variableNames = fileInformation.at("varNames").asStrings();
×
618

619
            //mDataTypes = fileInformation.at("dataTypes").asStrings();
620

621
            // Initialize variables: 
UNCOV
622
            List<DataVariable> variableList = new ArrayList<>();
×
623

624
            for (String varName : variableNames) {
×
625
                DataVariable dv = new DataVariable(varQnty, dataTable);
×
626
                dv.setName(varName);
×
UNCOV
627
                dv.setLabel(varName);
×
628
                // TODO:
629
                // Check if variables have real descriptive labels defined, 
630
                // via the mechanismm provided by that special optional package... 
631
                // (?) -- L.A.
UNCOV
632
                variableList.add(dv);
×
633

634
                // variableLabels.put(varName, varName);
635
                // variableNameList.add(varName);
UNCOV
636
                varQnty++;
×
637
            }
638

639
            dataTable.setVarQuantity(new Long(varQnty));
×
UNCOV
640
            dataTable.setDataVariables(variableList);
×
641
        
642
            // Get the Variable Meta Data Table while Populating 
UNCOV
643
            processVariableInfo(metaInfo, dataTable);
×
644
      
645
            
646
            if (fileInformation.at("caseQnty") != null) {
×
UNCOV
647
                int caseQuantity = 0; 
×
648
                try {
649
                    caseQuantity =  fileInformation.at("caseQnty").asInteger();
×
UNCOV
650
                } catch (REXPMismatchException rexp) {
×
651
                    // bummer! - but not fatal. 
652
                }
×
653
                if (caseQuantity > 0) {
×
UNCOV
654
                    dataTable.setCaseQuantity(new Long(caseQuantity));
×
655
                }
656
            }
657
    }
658
    catch (REXPMismatchException ex) {
×
UNCOV
659
      LOG.warning("RDATAFileReader: Could not put information correctly");
×
660
    }
661
    catch (Exception ex) {
×
662
      ex.printStackTrace();
×
663
      LOG.warning(ex.getMessage());
×
UNCOV
664
    }
×
665
    
666
    
UNCOV
667
  }
×
668

669
    /**
670
   * Read a Local Resource and Return Its Contents as a String
671
   * <code>readLocalResource</code> searches the local path around the class
672
   * <code>RDATAFileReader</code> for a file and returns its contents as a
673
   * string.
674
   * @param path String specifying the name of the local file to be converted
675
   * into a UTF-8 string.
676
   * @return a UTF-8 <code>String</code>
677
   */
678
    private static String readLocalResource(String path) {
679
        // Debug
UNCOV
680
        LOG.fine(String.format("RDATAFileReader: readLocalResource: reading local path \"%s\"", path));
×
681

682
        // Get stream
683
        InputStream resourceStream = RDATAFileReader.class.getResourceAsStream(path);
×
UNCOV
684
        String resourceAsString = "";
×
685

686
        // Try opening a buffered reader stream
687
        try {
UNCOV
688
            BufferedReader rd = new BufferedReader(new InputStreamReader(resourceStream, "UTF-8"));
×
689

690
            String line = null;
×
691
            while ((line = rd.readLine()) != null) {
×
UNCOV
692
                resourceAsString = resourceAsString.concat(line + "\n");
×
693
            }
694
            resourceStream.close();
×
695
        } catch (IOException ex) {
×
696
            LOG.warning(String.format("RDATAFileReader: (readLocalResource) resource stream from path \"%s\" was invalid", path));
×
UNCOV
697
        }
×
698

699
        // Return string
UNCOV
700
        return resourceAsString;
×
701
    }
702

703
  
704
    /**
705
     * Get a HashMap matching column number to meta-data used in re-creating R
706
     * Objects
707
     *
708
     * @param metaInfo an "RList" Object containing indices - type, type.string,
709
     * class, levels, and format.
710
     * @param dataTable a dataverse DataTable object
711
     */
712
    private void processVariableInfo(RList metaInfo, DataTable dataTable) throws IOException {
713
        // list(type = 1, type.string = "integer", class = class(values), levels = NULL, format = NULL)
714
        Integer variableType = -1;
×
715
        String variableTypeName = "", variableFormat = "";
×
UNCOV
716
        String[] variableLevels = null;
×
717

718

UNCOV
719
        for (int k = 0; k < metaInfo.size(); k++) {
×
720

721
            try {
722

723
                // Meta-data for a column in the data-set
UNCOV
724
                RList columnMeta = metaInfo.at(k).asList();
×
725

726
                // Extract information from the returned list
727
                variableType = !columnMeta.at("type").isNull() ? columnMeta.at("type").asInteger() : null;
×
728
                variableTypeName = !columnMeta.at("type.string").isNull() ? columnMeta.at("type.string").asString() : null;
×
729
                variableLevels = !columnMeta.at("levels").isNull() ? columnMeta.at("levels").asStrings() : new String[0];
×
UNCOV
730
                variableFormat = !columnMeta.at("format").isNull() ? columnMeta.at("format").asString() : null;
×
731

732
                LOG.fine("variable type: " + variableType);
×
733
                LOG.fine("variable type name: " + variableTypeName);
×
UNCOV
734
                LOG.fine("variable format: " + variableFormat);
×
735

736
                for (String variableLevel : variableLevels) {
×
UNCOV
737
                    LOG.fine("variable level: " + variableLevel);
×
738
                }
739

740
                //dataTable.getDataVariables().get(k).setFormatSchema("RDATA");
741

UNCOV
742
                if (variableTypeName == null || variableTypeName.equals("character") || variableTypeName.equals("other")) {
×
743
                    // This is a String: 
744
                    dataTable.getDataVariables().get(k).setTypeCharacter();
×
UNCOV
745
                    dataTable.getDataVariables().get(k).setIntervalDiscrete();
×
746
                    
747
                } else if (variableTypeName.equals("integer")) {
×
748
                    dataTable.getDataVariables().get(k).setTypeNumeric();
×
UNCOV
749
                    dataTable.getDataVariables().get(k).setIntervalDiscrete();
×
750
                    
751
                } else if (variableTypeName.equals("numeric") || variableTypeName.equals("double")) {
×
752
                    dataTable.getDataVariables().get(k).setTypeNumeric();
×
UNCOV
753
                    dataTable.getDataVariables().get(k).setIntervalContinuous();
×
754
                    
755
                } else if (variableTypeName.startsWith("Date")) {
×
756
                    dataTable.getDataVariables().get(k).setTypeCharacter();
×
757
                    dataTable.getDataVariables().get(k).setIntervalDiscrete();
×
UNCOV
758
                    dataTable.getDataVariables().get(k).setFormat(variableFormat);
×
759
                    
760
                    // instead:
761
                    if (variableTypeName.equals("Date")) {
×
762
                        dataTable.getDataVariables().get(k).setFormatCategory("date");
×
763
                    } else if (variableTypeName.equals("DateTime")) {
×
UNCOV
764
                        dataTable.getDataVariables().get(k).setFormatCategory("time");
×
765
                    }
766
                    
UNCOV
767
                } else if (variableTypeName.equals("factor")) {
×
768
                    
769
                    // All R factors are *string* factors!
770
                    dataTable.getDataVariables().get(k).setTypeCharacter();
×
771
                    dataTable.getDataVariables().get(k).setIntervalDiscrete();
×
UNCOV
772
                    if (variableLevels != null && variableLevels.length > 0) {
×
773
                        // yes, this is a factor, with levels defined.
774
                        LOG.fine("this is a factor.");
×
775
                        dataTable.getDataVariables().get(k).setFactor(true);
×
UNCOV
776
                        boolean ordered = false; 
×
777
                        
778
                        if (variableFormat != null && variableFormat.equals("ordered")) {
×
779
                            LOG.fine("an ordered factor, too");
×
UNCOV
780
                            ordered = true;
×
781
                        }
782
                        
783
                        for (int i = 0; i < variableLevels.length; i++) {
×
784
                            VariableCategory cat = new VariableCategory();
×
UNCOV
785
                            cat.setValue(variableLevels[i]);
×
786
                            // Sadly, R factors don't have descriptive labels;
UNCOV
787
                            cat.setLabel(variableLevels[i]);
×
788
                            
789
                            if (ordered) {
×
UNCOV
790
                                cat.setOrder(i+1);
×
791
                            }
792

793
                            /* cross-link the variable and category to each other: */
794
                            cat.setDataVariable(dataTable.getDataVariables().get(k));
×
UNCOV
795
                            dataTable.getDataVariables().get(k).getCategories().add(cat);
×
796
                        }
797
                        
UNCOV
798
                        dataTable.getDataVariables().get(k).setOrderedCategorical(ordered);
×
799

UNCOV
800
                    }
×
801

802
                } // And finally, a special case for logical variables: 
803
                // For all practical purposes, they are handled as numeric factors
804
                // with 0 and 1 for the values and "FALSE" and "TRUE" for the labels.
805
                // (so this can also be used as an example of ingesting a *numeric* 
806
                // categorical variable - as opposed to *string* categoricals, that
807
                // we turn R factors into - above.
808
                else if ("logical".equals(variableTypeName)) {
×
UNCOV
809
                    dataTable.getDataVariables().get(k).setFormatCategory("Boolean");
×
810
                    
811
                    dataTable.getDataVariables().get(k).setTypeNumeric();
×
UNCOV
812
                    dataTable.getDataVariables().get(k).setIntervalDiscrete();
×
813

814
                    String booleanFactorLabels[] = new String[2];
×
815
                    booleanFactorLabels[0] = "FALSE";
×
UNCOV
816
                    booleanFactorLabels[1] = "TRUE";
×
817

818
                    String booleanFactorValues[] = new String[2];
×
819
                    booleanFactorValues[0] = "0";
×
UNCOV
820
                    booleanFactorValues[1] = "1";
×
821

822
                    for (int i = 0; i < 2; i++) {
×
823
                        VariableCategory cat = new VariableCategory();
×
UNCOV
824
                        cat.setValue(booleanFactorValues[i]);
×
825
                        // Sadly, R factors don't have descriptive labels;
UNCOV
826
                        cat.setLabel(booleanFactorLabels[i]);
×
827

828
                        /* cross-link the variable and category to each other: */
829
                        cat.setDataVariable(dataTable.getDataVariables().get(k));
×
UNCOV
830
                        dataTable.getDataVariables().get(k).getCategories().add(cat);
×
831
                    }
832
                }
833

834
                // Store the meta-data in a hashmap (to return later)
UNCOV
835
            } catch (REXPMismatchException ex) {
×
836
                // If something went wrong, then it wasn't meant to be for that column.
837
                // And you know what? That's okay.
838
                ex.printStackTrace();
×
839
                LOG.fine(String.format("Could not process variable %d of the data frame.", k));
×
UNCOV
840
            }
×
841
        }
UNCOV
842
    }
×
843
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc