• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pysat / pysatCDF / 3621969202

pending completion
3621969202

push

github

GitHub
Merge pull request #39 from pysat/tst/syntax

2 of 2 new or added lines in 1 file covered. (100.0%)

338 of 372 relevant lines covered (90.86%)

2.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.79
/pysatCDF/_cdf.py
1
from __future__ import print_function
3✔
2
from __future__ import absolute_import
3✔
3
import copy
3✔
4
import numpy as np
3✔
5
import string
3✔
6
import sys
3✔
7

8
import pandas
3✔
9
import pysat
3✔
10

11
from pysatCDF import fortran_cdf
3✔
12

13

14
class CDF(object):
3✔
15
    """Reads data from NASA Common Data Format (CDF) files.
16

17
    pysatCDF presents a Python interface to NASA CDF files.
18
    To provide an easy installation experience the CDF library
19
    is included with the software and should be built
20
    automatically when pysatCDF is installed. In addition
21
    to zVariable support in CDFs, pysatCDF provides
22
    functionality to load CDF data and export it into a
23
    format for pysat integration.
24

25
    pysatCDF provides Fortran calls to the simplest CDF fortran
26
    interface, which is itself mapped from C
27
    code. The pysatCDF Fortran is wrapped up by f2py for Python and
28
    is used by the high level python in pysatCDF.
29
    The routines have been observed to be stable over many
30
    data loads.
31

32
    Note when opening a CDF file with this module all data is
33
    automatically loaded from disk unless specific variables
34
    are excluded upon instantiation.
35

36
    """
37

38
    def __init__(self, fname):
3✔
39
        # In CDF docs it says don't include .cdf in name
40
        name = fname
3✔
41
        if fname[-4:].lower() == '.cdf':
3✔
42
            name = fname[:-4]
3✔
43

44
        self.fname = name
3✔
45
        status = fortran_cdf.open(name)
3✔
46

47
        self.data_loaded = False
3✔
48

49
        # CDF library numeric codes for data types.
50
        cdty = {}
3✔
51
        cdty['real4'] = 21
3✔
52
        cdty['float'] = 44
3✔
53
        cdty['real8'] = 22
3✔
54
        cdty['double'] = 45
3✔
55

56
        cdty['byte'] = 41
3✔
57
        cdty['int1'] = 1
3✔
58
        cdty['int2'] = 2
3✔
59
        cdty['int4'] = 4
3✔
60
        cdty['uint1'] = 11
3✔
61
        cdty['uint2'] = 12
3✔
62
        cdty['uint4'] = 14
3✔
63

64
        cdty['char'] = 51
3✔
65
        cdty['uchar'] = 52
3✔
66
        cdty['epoch'] = 31
3✔
67
        cdty['epoch16'] = 32
3✔
68
        cdty['TT2000'] = 33
3✔
69
        self.cdf_data_types = cdty
3✔
70

71
        if status == 0:
3✔
72
            # Inquire as to files contents.
73
            self.inquire()
3✔
74

75
            # Get all attribute info.
76
            self._read_all_attribute_info()
3✔
77

78
            # Get z variable info, basic stats on the variables.
79
            self._read_all_z_variable_info()
3✔
80

81
            # Load variables.
82
            self.load_all_variables()
3✔
83

84
            # Load all variable attribute data (zVariables).
85
            self._read_all_z_attribute_data()
3✔
86
        else:
87
            raise IOError(fortran_cdf.statusreporter(status))
×
88

89
    def __enter__(self):
3✔
90
        return self
3✔
91

92
    def __exit__(self, type, value, tb):
3✔
93
        pass
3✔
94

95
    def __getitem__(self, key):
3✔
96
        """Return CDF variable by name."""
97
        return chameleon(self.fname, key, self.data[key], self.meta[key],
3✔
98
                         self.z_variable_info[key])
99

100
    def inquire(self):
3✔
101
        """Maps to fortran CDF_Inquire.
102

103
        Assigns parameters returned by CDF_Inquire
104
        to pysatCDF instance. Not intended
105
        for regular direct use by user.
106

107
        """
108

109
        name = copy.deepcopy(self.fname)
3✔
110
        stats = fortran_cdf.inquire(name)
3✔
111

112
        # Break out fortran output into something meaningful.
113
        status = stats[0]
3✔
114
        if status == 0:
3✔
115
            self._num_dims = stats[1]
3✔
116
            self._dim_sizes = stats[2]
3✔
117
            self._encoding = stats[3]
3✔
118
            self._majority = stats[4]
3✔
119
            self._max_rec = stats[5]
3✔
120
            self._num_r_vars = stats[6]
3✔
121
            self._num_z_vars = stats[7]
3✔
122
            self._num_attrs = stats[8]
3✔
123
        else:
124
            raise IOError(fortran_cdf.statusreporter(status))
×
125

126
    def _read_all_z_variable_info(self):
3✔
127
        """Gets all CDF z-variable information, not data though.
128

129
        Maps to calls using var_inquire. Gets information on
130
        data type, number of elements, number of dimensions, etc.
131

132
        """
133

134
        self.z_variable_info = {}
3✔
135
        self.z_variable_names_by_num = {}
3✔
136

137
        # Call Fortran that grabs all of the basic stats on all of the
138
        # zVariables in one go.
139
        info = fortran_cdf.z_var_all_inquire(self.fname, self._num_z_vars,
3✔
140
                                             len(self.fname))
141
        status = info[0]
3✔
142
        data_types = info[1]
3✔
143
        num_elems = info[2]
3✔
144
        rec_varys = info[3]
3✔
145
        dim_varys = info[4]
3✔
146
        num_dims = info[5]
3✔
147
        dim_sizes = info[6]
3✔
148
        rec_nums = info[7]
3✔
149
        var_nums = info[8]
3✔
150
        var_names = info[9]
3✔
151

152
        if status == 0:
3✔
153
            for i in np.arange(len(data_types)):
3✔
154
                out = {}
3✔
155
                out['data_type'] = data_types[i]
3✔
156
                out['num_elems'] = num_elems[i]
3✔
157
                out['rec_vary'] = rec_varys[i]
3✔
158
                out['dim_varys'] = dim_varys[i]
3✔
159
                out['num_dims'] = num_dims[i]
3✔
160

161
                # Only looking at first possible extra dimension.
162
                out['dim_sizes'] = dim_sizes[i, :1]
3✔
163
                if out['dim_sizes'][0] == 0:
3✔
164
                    out['dim_sizes'][0] += 1
3✔
165
                out['rec_num'] = rec_nums[i]
3✔
166
                out['var_num'] = var_nums[i]
3✔
167
                var_name = ''.join(var_names[i].astype('U'))
3✔
168
                out['var_name'] = var_name.rstrip()
3✔
169
                self.z_variable_info[out['var_name']] = out
3✔
170
                self.z_variable_names_by_num[out['var_num']] = var_name
3✔
171
        else:
172
            raise IOError(fortran_cdf.statusreporter(status))
×
173

174
    def load_all_variables(self):
3✔
175
        """Loads all variables from CDF.
176

177
        Note this routine is called automatically
178
        upon instantiation.
179

180
        """
181

182
        self.data = {}
3✔
183

184
        # Need to add r variable names.
185
        file_var_names = self.z_variable_info.keys()
3✔
186

187
        # Collect variable information for each, organize it neatly for
188
        # fortran call.
189
        dim_sizes = []
3✔
190
        rec_nums = []
3✔
191
        data_types = []
3✔
192
        names = []
3✔
193
        for i, name in enumerate(file_var_names):
3✔
194
            dim_sizes.extend(self.z_variable_info[name]['dim_sizes'])
3✔
195
            rec_nums.append(self.z_variable_info[name]['rec_num'])
3✔
196
            data_types.append(self.z_variable_info[name]['data_type'])
3✔
197
            names.append(name.ljust(256))
3✔
198
        dim_sizes = np.array(dim_sizes)
3✔
199
        rec_nums = np.array(rec_nums)
3✔
200
        data_types = np.array(data_types)
3✔
201

202
        # Individually load all variables by each data type.
203
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
204
                                   self.cdf_data_types['real4'],
205
                                   fortran_cdf.get_multi_z_real4)
206
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
207
                                   self.cdf_data_types['float'],
208
                                   fortran_cdf.get_multi_z_real4)
209
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
210
                                   self.cdf_data_types['real8'],
211
                                   fortran_cdf.get_multi_z_real8)
212
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
213
                                   self.cdf_data_types['double'],
214
                                   fortran_cdf.get_multi_z_real8)
215
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
216
                                   self.cdf_data_types['int4'],
217
                                   fortran_cdf.get_multi_z_int4)
218
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
219
                                   self.cdf_data_types['uint4'],
220
                                   fortran_cdf.get_multi_z_int4,
221
                                   data_offset=2 ** 32)
222
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
223
                                   self.cdf_data_types['int2'],
224
                                   fortran_cdf.get_multi_z_int2)
225
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
226
                                   self.cdf_data_types['uint2'],
227
                                   fortran_cdf.get_multi_z_int2,
228
                                   data_offset=2 ** 16)
229
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
230
                                   self.cdf_data_types['int1'],
231
                                   fortran_cdf.get_multi_z_int1)
232
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
233
                                   self.cdf_data_types['uint1'],
234
                                   fortran_cdf.get_multi_z_int1,
235
                                   data_offset=2 ** 8)
236
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
237
                                   self.cdf_data_types['byte'],
238
                                   fortran_cdf.get_multi_z_int1)
239
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
240
                                   self.cdf_data_types['epoch'],
241
                                   fortran_cdf.get_multi_z_real8,
242
                                   epoch=True)
243
        self._call_multi_fortran_z(names, data_types, rec_nums, 2 * dim_sizes,
3✔
244
                                   self.cdf_data_types['epoch16'],
245
                                   fortran_cdf.get_multi_z_epoch16,
246
                                   epoch16=True)
247
        self._call_multi_fortran_z(names, data_types, rec_nums, dim_sizes,
3✔
248
                                   self.cdf_data_types['TT2000'],
249
                                   fortran_cdf.get_multi_z_tt2000,
250
                                   epoch=True)
251

252
        # Mark data has been loaded.
253
        self.data_loaded = True
3✔
254

255
        return
3✔
256

257
    def _call_multi_fortran_z(self, names, data_types, rec_nums,
3✔
258
                              dim_sizes, input_type_code, func,
259
                              epoch=False, data_offset=None, epoch16=False):
260
        """Calls fortran functions to load CDF variable data
261

262
        Parameters
263
        ----------
264
        names : list-like
265
            List of variables names.
266
        data_types : list-like
267
            List of all loaded data type codes as used by CDF.
268
        rec_nums : list-like
269
            List of record numbers in CDF file. Provided by variable_info.
270
        dim_sizes : list-like
271
            List of dimensions as provided by variable_info.
272
        input_type_code : int
273
            Specific type code to load.
274
        func : function
275
            Fortran function via python interface that will be used for
276
            actual loading.
277
        epoch : bool
278
            Flag indicating type is epoch. Translates things to datetime
279
            standard. (default=False)
280
        data_offset :
281
            Offset value to be applied to data. Required for unsigned
282
            integers in CDF. (default=None)
283
        epoch16 : bool
284
            Flag indicating type is epoch16. Translates things to datetime
285
            standard. (default=False)
286

287

288
        """
289

290
        # Isolate input type code variables from total supplied types.
291
        idx, = np.where(data_types == input_type_code)
3✔
292

293
        if len(idx) > 0:
3✔
294
            # Read all data of a given type at once.
295
            max_rec = rec_nums[idx].max()
3✔
296
            sub_names = np.array(names)[idx]
3✔
297
            sub_sizes = dim_sizes[idx]
3✔
298
            status, data = func(self.fname, sub_names.tolist(),
3✔
299
                                sub_sizes, sub_sizes.sum(), max_rec,
300
                                len(sub_names))
301
            if status == 0:
3✔
302
                # Account for quirks of CDF data storage for certain types.
303
                if data_offset is not None:
3✔
304
                    data = data.astype(int)
3✔
305
                    idx, idy, = np.where(data < 0)
3✔
306
                    data[idx, idy] += data_offset
3✔
307
                if epoch:
3✔
308
                    # Account for difference in seconds between
309
                    # CDF epoch and python's epoch, leap year in there
310
                    # (datetime(1971,1,2) -
311
                    #      datetime(1,1,1)).total_seconds()*1000
312
                    data -= 62167219200000
3✔
313
                    data = data.astype('<M8[ms]')
3✔
314
                if epoch16:
3✔
315
                    data[0::2, :] -= 62167219200
×
316
                    data = data[0::2, :] * 1E9 + data[1::2, :] / 1.E3
×
317
                    data = data.astype('datetime64[ns]')
×
318
                    sub_sizes /= 2
×
319

320
                # All data of a type has been loaded and tweaked as necessary.
321
                # Parse through returned array to break out the individual
322
                # variables as appropriate.
323
                self._process_return_multi_z(data, sub_names, sub_sizes)
3✔
324
            else:
325
                raise IOError(fortran_cdf.statusreporter(status))
×
326

327
        return
3✔
328

329
    def _process_return_multi_z(self, data, names, dim_sizes):
3✔
330
        """Process and attach data from various `fortran_cdf` 'get' functions.
331
        """
332

333
        d1 = 0
3✔
334
        d2 = 0
3✔
335
        for name, dim_size in zip(names, dim_sizes):
3✔
336
            d2 = d1 + dim_size
3✔
337
            if dim_size == 1:
3✔
338
                self.data[name.rstrip()] = data[d1, :]
3✔
339
            else:
340
                self.data[name.rstrip()] = data[d1:d2, :]
×
341
            d1 += dim_size
3✔
342

343
        return
3✔
344

345
    def _read_all_attribute_info(self):
3✔
346
        """Read all attribute properties, g, r, and z attributes"""
347

348
        num = copy.deepcopy(self._num_attrs)
3✔
349
        fname = copy.deepcopy(self.fname)
3✔
350
        out = fortran_cdf.inquire_all_attr(fname, num, len(fname))
3✔
351
        status = out[0]
3✔
352
        names = out[1].astype('U')
3✔
353
        scopes = out[2]
3✔
354
        max_gentries = out[3]
3✔
355
        max_rentries = out[4]
3✔
356
        max_zentries = out[5]
3✔
357
        attr_nums = out[6]
3✔
358

359
        global_attrs_info = {}
3✔
360
        var_attrs_info = {}
3✔
361
        if status == 0:
3✔
362
            for (name, scope, gentry,
3✔
363
                 rentry, zentry, num) in zip(names, scopes, max_gentries,
364
                                             max_rentries, max_zentries,
365
                                             attr_nums):
366
                name = ''.join(name)
3✔
367
                name = name.rstrip()
3✔
368
                nug = {}
3✔
369
                nug['scope'] = scope
3✔
370
                nug['max_gentry'] = gentry
3✔
371
                nug['max_rentry'] = rentry
3✔
372
                nug['max_zentry'] = zentry
3✔
373
                nug['attr_num'] = num
3✔
374
                flag = (gentry == 0) & (rentry == 0) & (zentry == 0)
3✔
375
                if not flag:
3✔
376
                    if scope == 1:
3✔
377
                        global_attrs_info[name] = nug
3✔
378
                    elif scope == 2:
3✔
379
                        var_attrs_info[name] = nug
3✔
380

381
            self.global_attrs_info = global_attrs_info
3✔
382
            self.var_attrs_info = var_attrs_info
3✔
383
        else:
384
            raise IOError(fortran_cdf.statusreporter(status))
×
385

386
        return
3✔
387

388
    def _read_all_z_attribute_data(self):
3✔
389
        """Read all CDF z-attribute data"""
390
        self.meta = {}
3✔
391

392
        # Collect attribute info needed to get more info from
393
        # fortran routines.
394
        max_entries = []
3✔
395
        attr_nums = []
3✔
396
        names = []
3✔
397
        attr_names = []
3✔
398
        names = self.var_attrs_info.keys()
3✔
399
        num_z_attrs = len(names)
3✔
400
        exp_attr_nums = []
3✔
401
        for key in names:
3✔
402
            max_entries.append(self.var_attrs_info[key]['max_zentry'])
3✔
403
            attr_nums.append(self.var_attrs_info[key]['attr_num'])
3✔
404
        attr_nums = np.array(attr_nums)
3✔
405
        max_entries = np.array(max_entries)
3✔
406

407
        info = fortran_cdf.z_attr_all_inquire(self.fname, attr_nums,
3✔
408
                                              num_z_attrs, max_entries,
409
                                              self._num_z_vars, len(self.fname))
410

411
        status = info[0]
3✔
412
        data_types = info[1]
3✔
413
        num_elems = info[2]
3✔
414
        entry_nums = info[3]
3✔
415

416
        if status == 0:
3✔
417
            for i, name in enumerate(names):
3✔
418
                self.var_attrs_info[name]['data_type'] = data_types[i]
3✔
419
                self.var_attrs_info[name]['num_elems'] = num_elems[i]
3✔
420
                self.var_attrs_info[name]['entry_num'] = entry_nums[i]
3✔
421
                exp_attr_nums.extend([self.var_attrs_info[name]['attr_num']]
3✔
422
                                     * len(entry_nums[i]))
423
                attr_names.extend([name] * len(entry_nums[i]))
3✔
424
        else:
425
            raise IOError(fortran_cdf.statusreporter(status))
×
426

427
        # All the info is now packed up.
428
        # Need to break it out to make it easier to load via fortran.
429
        # Includes:
430
        # attribute  id, entry id (zVariable ID), data_type, num_elems
431
        data_types = data_types.flatten()
3✔
432
        num_elems = num_elems.flatten()
3✔
433
        entry_nums = entry_nums.flatten()
3✔
434
        attr_nums = np.array(exp_attr_nums)
3✔
435

436
        # Drop everything that isn't valid
437
        idx, = np.where(entry_nums > 0)
3✔
438

439
        data_types = data_types[idx]
3✔
440
        num_elems = num_elems[idx]
3✔
441
        entry_nums = entry_nums[idx]
3✔
442
        attr_nums = attr_nums[idx]
3✔
443
        attr_names = np.array(attr_names)[idx]
3✔
444

445
        # Grab corresponding variable name for each attribute
446
        var_names = [self.z_variable_names_by_num[i].rstrip()
3✔
447
                     for i in entry_nums]
448

449
        # The names that go along with this are already set up
450
        # in `attr_names`.
451
        # Chunk by data type, grab largest num_elems.
452

453
        # Get data back, shorten to num_elems, add to structure.
454
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
455
                                        entry_nums, attr_nums, var_names,
456
                                        self.cdf_data_types['real4'],
457
                                        fortran_cdf.get_multi_z_attr_real4)
458
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
459
                                        entry_nums, attr_nums, var_names,
460
                                        self.cdf_data_types['float'],
461
                                        fortran_cdf.get_multi_z_attr_real4)
462
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
463
                                        entry_nums, attr_nums, var_names,
464
                                        self.cdf_data_types['real8'],
465
                                        fortran_cdf.get_multi_z_attr_real8)
466
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
467
                                        entry_nums, attr_nums, var_names,
468
                                        self.cdf_data_types['double'],
469
                                        fortran_cdf.get_multi_z_attr_real8)
470
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
471
                                        entry_nums, attr_nums, var_names,
472
                                        self.cdf_data_types['byte'],
473
                                        fortran_cdf.get_multi_z_attr_int1)
474
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
475
                                        entry_nums, attr_nums, var_names,
476
                                        self.cdf_data_types['int1'],
477
                                        fortran_cdf.get_multi_z_attr_int1)
478
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
479
                                        entry_nums, attr_nums, var_names,
480
                                        self.cdf_data_types['uint1'],
481
                                        fortran_cdf.get_multi_z_attr_int1,
482
                                        data_offset=256)
483
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
484
                                        entry_nums, attr_nums, var_names,
485
                                        self.cdf_data_types['int2'],
486
                                        fortran_cdf.get_multi_z_attr_int2)
487
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
488
                                        entry_nums, attr_nums, var_names,
489
                                        self.cdf_data_types['uint2'],
490
                                        fortran_cdf.get_multi_z_attr_int2,
491
                                        data_offset=65536)
492
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
493
                                        entry_nums, attr_nums, var_names,
494
                                        self.cdf_data_types['int4'],
495
                                        fortran_cdf.get_multi_z_attr_int4)
496
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
497
                                        entry_nums, attr_nums, var_names,
498
                                        self.cdf_data_types['uint4'],
499
                                        fortran_cdf.get_multi_z_attr_int4,
500
                                        data_offset=2 ** 32)
501
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
502
                                        entry_nums, attr_nums, var_names,
503
                                        self.cdf_data_types['char'],
504
                                        fortran_cdf.get_multi_z_attr_char)
505
        self._call_multi_fortran_z_attr(attr_names, data_types, num_elems,
3✔
506
                                        entry_nums, attr_nums, var_names,
507
                                        self.cdf_data_types['uchar'],
508
                                        fortran_cdf.get_multi_z_attr_char)
509
        return
3✔
510

511
    def _call_multi_fortran_z_attr(self, names, data_types, num_elems,
3✔
512
                                   entry_nums, attr_nums, var_names,
513
                                   input_type_code, func, data_offset=None):
514
        """Calls Fortran function that reads attribute data.
515

516
        data_offset translates unsigned into signed.
517
        If number read in is negative, offset added.
518
        """
519
        # Isolate input type code variables.
520
        idx, = np.where(data_types == input_type_code)
3✔
521

522
        if len(idx) > 0:
3✔
523
            # Maximum array dimension.
524
            max_num = num_elems[idx].max()
3✔
525
            sub_num_elems = num_elems[idx]
3✔
526
            sub_names = np.array(names)[idx]
3✔
527
            sub_var_names = np.array(var_names)[idx]
3✔
528

529
            # zVariable numbers, 'entry' number.
530
            sub_entry_nums = entry_nums[idx]
3✔
531

532
            # Attribute number.
533
            sub_attr_nums = attr_nums[idx]
3✔
534
            status, data = func(self.fname, sub_attr_nums, sub_entry_nums,
3✔
535
                                len(sub_attr_nums), max_num, len(self.fname))
536
            if (status == 0).all():
3✔
537
                if data_offset is not None:
3✔
538
                    data = data.astype(int)
3✔
539
                    idx, idy, = np.where(data < 0)
3✔
540
                    data[idx, idy] += data_offset
3✔
541
                self._process_return_multi_z_attr(data, sub_names,
3✔
542
                                                  sub_var_names, sub_num_elems)
543
            else:
544
                # Raise the first error.
545
                idx, = np.where(status != 0)
×
546
                raise IOError(fortran_cdf.statusreporter(status[idx][0]))
×
547
        return
3✔
548

549
    def _process_return_multi_z_attr(self, data, attr_names, var_names,
3✔
550
                                     sub_num_elems):
551
        '''process and attach data from fortran_cdf.get_multi_*'''
552

553
        for i, (attr_name, var_name, num_e) in enumerate(zip(attr_names,
3✔
554
                                                             var_names,
555
                                                             sub_num_elems)):
556
            if var_name not in self.meta.keys():
3✔
557
                self.meta[var_name] = {}
3✔
558
            if num_e == 1:
3✔
559
                self.meta[var_name][attr_name] = data[i, 0]
3✔
560
            else:
561
                if data[i].dtype == '|S1':
3✔
562
                    chars = []
3✔
563
                    for d in data[i, :num_e]:
3✔
564
                        try:
3✔
565
                            chars.append(d.astype('U'))
3✔
566
                        except UnicodeDecodeError:
×
567
                            # Uninterpretable character was encountered.
568
                            # Fill inserted.
569
                            chars.append('*')
×
570
                    self.meta[var_name][attr_name] = ''.join(chars).rstrip()
3✔
571
                else:
572
                    self.meta[var_name][attr_name] = data[i, 0:num_e]
×
573

574
    def to_pysat(self, flatten_twod=True, units_label='UNITS',
3✔
575
                 name_label='LONG_NAME', fill_label='FILLVAL',
576
                 plot_label='FIELDNAM', min_label='VALIDMIN',
577
                 max_label='VALIDMAX', notes_label='VAR_NOTES',
578
                 desc_label='CATDESC', axis_label='LABLAXIS'):
579
        """Export loaded CDF data into data, meta for pysat module.
580

581
        Parameters
582
        ----------
583
        flatten_twod : bool (True)
584
            If True, then two dimensional data is flattened across
585
            columns. Name mangling is used to group data, first column
586
            is 'name', last column is 'name_end'. In between numbers are
587
            appended 'name_1', 'name_2', etc. All data for a given 2D array
588
            may be accessed via, data.ix[:,'item':'item_end']
589
            If False, then 2D data is stored as a series of DataFrames,
590
            indexed by Epoch. data.ix[0, 'item']
591
        units_label : str
592
            Identifier within metadata for units. Defults to CDAWab standard.
593
            (default='UNITS')
594
        name_label : str
595
            Identifier within metadata for variable name, not normally present
596
            within CDAWeb files. If not, will use values from the variable name
597
            in the file. (default='LONG_NAME')
598
        fill_label : str
599
            Identifier within metadata for Fill Values. Defults to CDAWab
600
            standard. (default='FILLVAL')
601
        plot_label : str
602
            Identifier within metadata for variable name used when plotting.
603
            Defults to CDAWab standard. (default='FIELDNAM')
604
        min_label : str
605
            Identifier within metadata for minimim variable value.
606
            Defults to CDAWab standard. (default='VALIDMIN')
607
        max_label : str
608
            Identifier within metadata for maximum variable value.
609
            Defults to CDAWab standard. (default='VALIDMAX')
610
        notes_label : str
611
            Identifier within metadata for notes. Defults to CDAWab standard.
612
             (default='VAR_NOTES')
613
        desc_label : str
614
            Identifier within metadata for a variable description.
615
            Defults to CDAWab standard. (default='CATDESC')
616
        axis_label : str
617
            Identifier within metadata for axis name used when plotting.
618
            Defults to CDAWab standard. (default='LABLAXIS')
619

620

621
        Returns
622
        -------
623
        data : pandas.DataFrame, pysat.Meta
624
            Data suitable for attachment to a pysat.Instrument object.
625
        meta : pysat.Meta
626
            pysat Metadata class suitable for attachment to a pysat.Instrument
627
            object.
628

629
        Note
630
        ----
631
        The *_labels should be set to the values in the file, if present.
632
        Note that once the meta object returned from this function is attached
633
        to a pysat.Instrument object then the *_labels on the Instrument
634
        are assigned to the newly attached Meta object.
635

636
        The pysat Meta object will use data with labels that match the patterns
637
        in *_labels even if the case does not match.
638

639
        """
640

641
        # Copy data.
642
        cdata = self.data.copy()
3✔
643

644
        # Create a dictionary of the labels for use in initializing
645
        # the Metadata.
646
        labels = {'units': (units_label, str), 'name': (name_label, str),
3✔
647
                  'notes': (notes_label, str), 'desc': (desc_label, str),
648
                  'plot': (plot_label, str), 'axis': (axis_label, str),
649
                  'scale': ('scale', str), 'min_val': (min_label, float),
650
                  'max_val': (max_label, float),
651
                  'fill_val': (fill_label, float)}
652

653
        # Create pysat.Meta object using data above
654
        # and utilize the attribute labels provided by the user.
655
        meta = pysat.Meta(pandas.DataFrame.from_dict(self.meta,
3✔
656
                                                     orient='index'),
657
                          labels=labels)
658

659
        # Account for different possible cases for Epoch, epoch, EPOCH, epOch.
660
        lower_names = [name.lower() for name in meta.keys()]
3✔
661
        for name, true_name in zip(lower_names, meta.keys()):
3✔
662
            if name == 'epoch':
3✔
663
                meta.data.rename(index={true_name: 'Epoch'}, inplace=True)
3✔
664
                epoch = cdata.pop(true_name)
3✔
665
                cdata['Epoch'] = epoch
3✔
666

667
        # Ready to format data, iterate over all of the data names
668
        # and put into a pandas DataFrame.
669
        two_d_data = []
3✔
670
        drop_list = []
3✔
671
        for name in cdata.keys():
3✔
672
            temp = np.shape(cdata[name])
3✔
673

674
            # Treat 2 dimensional data differently.
675
            if len(temp) == 2:
3✔
676
                if not flatten_twod:
×
677
                    # Put 2D data into a Frame at each time,
678
                    # remove data from dict when adding to the DataFrame.
679
                    frame = pandas.DataFrame(cdata[name].flatten(),
×
680
                                             columns=[name])
681
                    drop_list.append(name)
×
682

683
                    step = temp[0]
×
684
                    new_list = []
×
685
                    new_index = np.arange(step)
×
686
                    for i in np.arange(len(epoch)):
×
687
                        new_list.append(frame.iloc[i * step:(i + 1) * step, :])
×
688
                        new_list[-1].index = new_index
×
689

690
                    new_frame = pandas.Series(new_list, index=epoch, name=name)
×
691
                    two_d_data.append(new_frame)
×
692

693
                else:
694
                    # Flatten 2D into series of 1D columns.
695
                    new_names = [name + '_{i}'.format(i=i)
×
696
                                 for i in np.arange(temp[0] - 2)]
697
                    new_names.append(name + '_end')
×
698
                    new_names.insert(0, name)
×
699

700
                    # Remove data from dict when adding to the DataFrame.
701
                    drop_list.append(name)
×
702
                    frame = pandas.DataFrame(cdata[name].T,
×
703
                                             index=epoch,
704
                                             columns=new_names)
705
                    two_d_data.append(frame)
×
706

707
        for name in drop_list:
3✔
708
            _ = cdata.pop(name)
×
709

710
        # All of the data left over is 1D, add as Series
711
        data = pandas.DataFrame(cdata, index=epoch)
3✔
712
        two_d_data.append(data)
3✔
713
        data = pandas.concat(two_d_data, axis=1)
3✔
714
        data.drop('Epoch', axis=1, inplace=True)
3✔
715
        return data, meta
3✔
716

717

718
class chameleon(object):
3✔
719
    """Provides multiple access mechanisms for larger CDF object.
720

721
    Supports spacepy access pattern along with pysatCDF native
722
    data access pattern.
723

724
    """
725

726
    def __init__(self, fname, name, data, attr, info):
3✔
727
        self.fname = fname
3✔
728
        self.data = data
3✔
729
        self.attrs = attr
3✔
730
        self.name = name
3✔
731
        self.info = info
3✔
732

733
    def __getitem__(self, key):
3✔
734
        if key is Ellipsis:
3✔
735
            return self.data
3✔
736
        else:
737
            return self.data[key]
3✔
738

739
    def __repr__(self):
3✔
740
        out = 'CDF filename : ' + self.fname + '\n'
3✔
741
        out += 'CDF variable name: ' + self.name + '\n'
3✔
742
        for key in self.info.keys():
3✔
743
            out += key + " : " + str(self.info[key]) + '\n'
3✔
744

745
        return out
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc