• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

int-brain-lab / ibllib / 7961675356254463

pending completion
7961675356254463

Pull #557

continuous-integration/UCL

olivier
add test
Pull Request #557: Chained protocols

718 of 718 new or added lines in 27 files covered. (100.0%)

12554 of 18072 relevant lines covered (69.47%)

0.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.81
/ibllib/io/raw_data_loaders.py
1
#!/usr/bin/env python
2
# -*- coding:utf-8 -*-
3
# @Author: Niccolò Bonacchi, Miles Wells
4
# @Date: Monday, July 16th 2018, 1:28:46 pm
5
"""
1✔
6
Raw Data Loader functions for PyBpod rig
7

8
Module contains one loader function per raw datafile
9
"""
10
import json
1✔
11
import logging
1✔
12
import wave
1✔
13
from collections import OrderedDict
1✔
14
from datetime import datetime
1✔
15
from pathlib import Path, PureWindowsPath
1✔
16
from typing import Union
1✔
17

18
from dateutil import parser as dateparser
1✔
19
from pkg_resources import parse_version
1✔
20
import numpy as np
1✔
21
import pandas as pd
1✔
22

23
from iblutil.io import jsonable
1✔
24
from ibllib.io.video import assert_valid_label
1✔
25
from ibllib.time import uncycle_pgts, convert_pgts, date2isostr
1✔
26

27
_logger = logging.getLogger(__name__)
1✔
28

29

30
def trial_times_to_times(raw_trial):
1✔
31
    """
32
    Parse and convert all trial timestamps to "absolute" time.
33
    Float64 seconds from session start.
34

35
    0---BpodStart---TrialStart0---------TrialEnd0-----TrialStart1---TrialEnd1...0---ts0---ts1---
36
    tsN...absTS = tsN + TrialStartN - BpodStart
37

38
    Bpod timestamps are in microseconds (µs)
39
    PyBpod timestamps are is seconds (s)
40

41
    :param raw_trial: raw trial data
42
    :type raw_trial: dict
43
    :return: trial data with modified timestamps
44
    :rtype: dict
45
    """
46
    ts_bs = raw_trial['behavior_data']['Bpod start timestamp']
1✔
47
    ts_ts = raw_trial['behavior_data']['Trial start timestamp']
1✔
48
    # ts_te = raw_trial['behavior_data']['Trial end timestamp']
49

50
    def convert(ts):
1✔
51
        return ts + ts_ts - ts_bs
1✔
52

53
    converted_events = {}
1✔
54
    for k, v in raw_trial['behavior_data']['Events timestamps'].items():
1✔
55
        converted_events.update({k: [convert(i) for i in v]})
1✔
56
    raw_trial['behavior_data']['Events timestamps'] = converted_events
1✔
57

58
    converted_states = {}
1✔
59
    for k, v in raw_trial['behavior_data']['States timestamps'].items():
1✔
60
        converted_states.update({k: [[convert(i) for i in x] for x in v]})
1✔
61
    raw_trial['behavior_data']['States timestamps'] = converted_states
1✔
62

63
    shift = raw_trial['behavior_data']['Bpod start timestamp']
1✔
64
    raw_trial['behavior_data']['Bpod start timestamp'] -= shift
1✔
65
    raw_trial['behavior_data']['Trial start timestamp'] -= shift
1✔
66
    raw_trial['behavior_data']['Trial end timestamp'] -= shift
1✔
67
    assert raw_trial['behavior_data']['Bpod start timestamp'] == 0
1✔
68
    return raw_trial
1✔
69

70

71
def load_bpod(session_path, task_collection='raw_behavior_data'):
1✔
72
    """
73
    Load both settings and data from bpod (.json and .jsonable)
74

75
    :param session_path: Absolute path of session folder
76
    :param task_collection: Collection within sesison path with behavior data
77
    :return: dict settings and list of dicts data
78
    """
79
    return load_settings(session_path, task_collection), load_data(session_path, task_collection)
1✔
80

81

82
def load_data(session_path: Union[str, Path], task_collection='raw_behavior_data', time='absolute'):
1✔
83
    """
84
    Load PyBpod data files (.jsonable).
85

86
    Bpod timestamps are in microseconds (µs)
87
    PyBpod timestamps are is seconds (s)
88

89
    :param session_path: Absolute path of session folder
90
    :type session_path: str, Path
91
    :return: A list of len ntrials each trial being a dictionary
92
    :rtype: list of dicts
93
    """
94
    if session_path is None:
1✔
95
        _logger.warning('No data loaded: session_path is None')
×
96
        return
×
97
    path = Path(session_path).joinpath(task_collection)
1✔
98
    path = next(path.glob('_iblrig_taskData.raw*.jsonable'), None)
1✔
99
    if not path:
1✔
100
        _logger.warning('No data loaded: could not find raw data file')
1✔
101
        return None
1✔
102
    data = jsonable.read(path)
1✔
103
    if time == 'absolute':
1✔
104
        data = [trial_times_to_times(t) for t in data]
1✔
105
    return data
1✔
106

107

108
def load_camera_frameData(session_path, camera: str = 'left', raw: bool = False) -> pd.DataFrame:
1✔
109
    """ Loads binary frame data from Bonsai camera recording workflow.
110

111
    Args:
112
        session_path (StrPath): Path to session folder
113
        camera (str, optional): Load FramsData for specific camera. Defaults to 'left'.
114
        raw (bool, optional): Whether to return raw or parsed data. Defaults to False.
115

116
    Returns:
117
        parsed: (raw=False, Default)
118
        pandas.DataFrame: 4 int64 columns: {
119
                Timestamp,              # float64 (seconds from session start)
120
                embeddedTimeStamp,      # float64 (seconds from session start)
121
                embeddedFrameCounter,   # int64 (Frame number from session start)
122
                embeddedGPIOPinState    # object (State of each of the 4 GPIO pins as a
123
                                        # list of numpy boolean arrays
124
                                        # e.g. np.array([True, False, False, False])
125
            }
126
        raw:
127
            pandas.DataFrame: 4 int64 columns: {
128
                Timestamp,              # UTC ticks from BehaviorPC
129
                                        # (100's of ns since midnight 1/1/0001)
130
                embeddedTimeStamp,      # Camera timestamp (Needs unclycling and conversion)
131
                embeddedFrameCounter,   # Frame counter (int)
132
                embeddedGPIOPinState    # GPIO pin state integer representation of 4 pins
133
            }
134
    """
135
    camera = assert_valid_label(camera)
1✔
136
    fpath = Path(session_path).joinpath("raw_video_data")
1✔
137
    fpath = next(fpath.glob(f"_iblrig_{camera}Camera.frameData*.bin"), None)
1✔
138
    assert fpath, f"{fpath}\nFile not Found: Could not find bin file for cam <{camera}>"
1✔
139
    rdata = np.fromfile(fpath, dtype=np.float64)
1✔
140
    assert rdata.size % 4 == 0, "Dimension mismatch: bin file length is not mod 4"
1✔
141
    rows = int(rdata.size / 4)
1✔
142
    data = np.reshape(rdata.astype(np.int64), (rows, 4))
1✔
143
    df_dict = dict.fromkeys(
1✔
144
        ["Timestamp", "embeddedTimeStamp", "embeddedFrameCounter", "embeddedGPIOPinState"]
145
    )
146
    df = pd.DataFrame(data, columns=df_dict.keys())
1✔
147
    if raw:
1✔
148
        return df
1✔
149

150
    df_dict["Timestamp"] = (data[:, 0] - data[0, 0]) / 10_000_000  # in seconds from first frame
1✔
151
    camerats = uncycle_pgts(convert_pgts(data[:, 1]))
1✔
152
    df_dict["embeddedTimeStamp"] = camerats - camerats[0]  # in seconds from first frame
1✔
153
    df_dict["embeddedFrameCounter"] = data[:, 2] - data[0, 2]  # from start
1✔
154
    gpio = (np.right_shift(np.tile(data[:, 3], (4, 1)).T, np.arange(31, 27, -1)) & 0x1) == 1
1✔
155
    df_dict["embeddedGPIOPinState"] = [np.array(x) for x in gpio.tolist()]
1✔
156

157
    parsed_df = pd.DataFrame.from_dict(df_dict)
1✔
158
    return parsed_df
1✔
159

160

161
def load_camera_ssv_times(session_path, camera: str):
1✔
162
    """
163
    Load the bonsai frame and camera timestamps from Camera.timestamps.ssv
164

165
    NB: For some sessions the frame times are in the first column, in others the order is reversed.
166
    NB: If using the new bin file the bonsai_times is a float in seconds since first frame
167
    :param session_path: Absolute path of session folder
168
    :param camera: Name of the camera to load, e.g. 'left'
169
    :return: array of datetimes, array of frame times in seconds
170
    """
171
    camera = assert_valid_label(camera)
1✔
172
    video_path = Path(session_path).joinpath('raw_video_data')
1✔
173
    if next(video_path.glob(f'_iblrig_{camera}Camera.frameData*.bin'), None):
1✔
174
        df = load_camera_frameData(session_path, camera=camera)
1✔
175
        return df['Timestamp'].values, df['embeddedTimeStamp'].values
1✔
176

177
    file = next(video_path.glob(f'_iblrig_{camera.lower()}Camera.timestamps*.ssv'), None)
1✔
178
    if not file:
1✔
179
        file = str(video_path.joinpath(f'_iblrig_{camera.lower()}Camera.timestamps.ssv'))
×
180
        raise FileNotFoundError(file + ' not found')
×
181
    # NB: Numpy has deprecated support for non-naive timestamps.
182
    # Converting them is extremely slow: 6000 timestamps takes 0.8615s vs 0.0352s.
183
    # from datetime import timezone
184
    # c = {0: lambda x: datetime.fromisoformat(x).astimezone(timezone.utc).replace(tzinfo=None)}
185

186
    # Determine the order of the columns by reading one line and testing whether the first value
187
    # is an integer or not.
188
    with open(file, 'r') as f:
1✔
189
        line = f.readline()
1✔
190
    type_map = OrderedDict(bonsai='<M8[ns]', camera='<u4')
1✔
191
    try:
1✔
192
        int(line.split(' ')[1])
1✔
193
    except ValueError:
1✔
194
        type_map.move_to_end('bonsai')
1✔
195
    ssv_params = dict(names=type_map.keys(), dtype=','.join(type_map.values()), delimiter=' ')
1✔
196
    ssv_times = np.genfromtxt(file, **ssv_params)  # np.loadtxt is slower for some reason
1✔
197
    bonsai_times = ssv_times['bonsai']
1✔
198
    camera_times = uncycle_pgts(convert_pgts(ssv_times['camera']))
1✔
199
    return bonsai_times, camera_times
1✔
200

201

202
def load_embedded_frame_data(session_path, label: str, raw=False):
1✔
203
    """
204
    Load the embedded frame count and GPIO for a given session.  If the file doesn't exist,
205
    or is empty, None values are returned.
206
    :param session_path: Absolute path of session folder
207
    :param label: The specific video to load, one of ('left', 'right', 'body')
208
    :param raw: If True the raw data are returned without preprocessing, otherwise frame count is
209
    returned starting from 0 and the GPIO is returned as a dict of indices
210
    :return: The frame count, GPIO
211
    """
212
    count = load_camera_frame_count(session_path, label, raw=raw)
1✔
213
    gpio = load_camera_gpio(session_path, label, as_dicts=not raw)
1✔
214
    return count, gpio
1✔
215

216

217
def load_camera_frame_count(session_path, label: str, raw=True):
1✔
218
    """
219
    Load the embedded frame count for a given session.  If the file doesn't exist, or is empty,
220
    a None value is returned.
221
    :param session_path: Absolute path of session folder
222
    :param label: The specific video to load, one of ('left', 'right', 'body')
223
    :param raw: If True the raw data are returned without preprocessing, otherwise frame count is
224
    returned starting from 0
225
    :return: The frame count
226
    """
227
    if session_path is None:
1✔
228
        return
1✔
229

230
    label = assert_valid_label(label)
1✔
231
    video_path = Path(session_path).joinpath('raw_video_data')
1✔
232
    if next(video_path.glob(f'_iblrig_{label}Camera.frameData*.bin'), None):
1✔
233
        df = load_camera_frameData(session_path, camera=label)
1✔
234
        return df['embeddedFrameCounter'].values
1✔
235

236
    # Load frame count
237
    glob = video_path.glob(f'_iblrig_{label}Camera.frame_counter*.bin')
1✔
238
    count_file = next(glob, None)
1✔
239
    count = np.fromfile(count_file, dtype=np.float64).astype(int) if count_file else []
1✔
240
    if len(count) == 0:
1✔
241
        return
1✔
242
    if not raw:
1✔
243
        count -= count[0]  # start from zero
1✔
244
    return count
1✔
245

246

247
def load_camera_gpio(session_path, label: str, as_dicts=False):
1✔
248
    """
249
    Load the GPIO for a given session.  If the file doesn't exist, or is empty, a None value is
250
    returned.
251

252
    The raw binary file contains uint32 values (saved as doubles) where the first 4 bits
253
    represent the state of each of the 4 GPIO pins. The array is expanded to an n x 4 array by
254
    shifting each bit to the end and checking whether it is 0 (low state) or 1 (high state).
255

256
    :param session_path: Absolute path of session folder
257
    :param label: The specific video to load, one of ('left', 'right', 'body')
258
    :param as_dicts: If False the raw data are returned boolean array with shape (n_frames, n_pins)
259
     otherwise GPIO is returned as a list of dictionaries with keys ('indices', 'polarities').
260
    :return: An nx4 boolean array where columns represent state of GPIO pins 1-4.
261
     If as_dicts is True, a list of dicts is returned with keys ('indices', 'polarities'),
262
     or None if the dictionary is empty.
263
    """
264
    if session_path is None:
1✔
265
        return
1✔
266
    raw_path = Path(session_path).joinpath('raw_video_data')
1✔
267
    label = assert_valid_label(label)
1✔
268

269
    # Load pin state
270
    if next(raw_path.glob(f'_iblrig_{label}Camera.frameData*.bin'), False):
1✔
271
        df = load_camera_frameData(session_path, camera=label, raw=False)
1✔
272
        gpio = np.array([x for x in df['embeddedGPIOPinState'].values])
1✔
273
        if len(gpio) == 0:
1✔
274
            return [None] * 4 if as_dicts else None
×
275
    else:
276
        GPIO_file = next(raw_path.glob(f'_iblrig_{label}Camera.GPIO*.bin'), None)
1✔
277
        # This deals with missing and empty files the same
278
        gpio = np.fromfile(GPIO_file, dtype=np.float64).astype(np.uint32) if GPIO_file else []
1✔
279
        # Check values make sense (4 pins = 16 possible values)
280
        if not np.isin(gpio, np.left_shift(np.arange(2 ** 4, dtype=np.uint32), 32 - 4)).all():
1✔
281
            _logger.warning('Unexpected GPIO values; decoding may fail')
1✔
282
        if len(gpio) == 0:
1✔
283
            return [None] * 4 if as_dicts else None
1✔
284
        # 4 pins represented as uint32
285
        # For each pin, shift its bit to the end and check the bit is set
286
        gpio = (np.right_shift(np.tile(gpio, (4, 1)).T, np.arange(31, 27, -1)) & 0x1) == 1
1✔
287

288
    if as_dicts:
1✔
289
        if not gpio.any():
1✔
290
            _logger.error('No GPIO changes')
1✔
291
            return [None] * 4
1✔
292
        # Find state changes for each pin and construct a dict of indices and polarities for each
293
        edges = np.vstack((gpio[0, :], np.diff(gpio.astype(int), axis=0)))
1✔
294
        # gpio = [(ind := np.where(edges[:, i])[0], edges[ind, i]) for i in range(4)]
295
        # gpio = [dict(zip(('indices', 'polarities'), x)) for x in gpio_]  # py3.8
296
        gpio = [{'indices': np.where(edges[:, i])[0],
1✔
297
                 'polarities': edges[edges[:, i] != 0, i]}
298
                for i in range(4)]
299
        # Replace empty dicts with None
300
        gpio = [None if x['indices'].size == 0 else x for x in gpio]
1✔
301

302
    return gpio
1✔
303

304

305
def _read_settings_json_compatibility_enforced(settings):
1✔
306
    """
307
    Patch iblrig settings for compatibility across rig versions.
308

309
    Parameters
310
    ----------
311
    settings : pathlib.Path, dict
312
        Either a _iblrig_taskSettings.raw.json file path or the loaded settings.
313

314
    Returns
315
    -------
316
    dict
317
        The task settings patched for compatibility.
318
    """
319
    if isinstance(settings, dict):
1✔
320
        md = settings.copy()
×
321
    else:
322
        with open(settings) as js:
1✔
323
            md = json.load(js)
1✔
324
    if 'IS_MOCK' not in md.keys():
1✔
325
        md['IS_MOCK'] = False
1✔
326
    if not md.get('IBLRIG_VERSION_TAG'):
1✔
327
        _logger.warning("You appear to be on an untagged version...")
1✔
328
        return md
1✔
329
    if 'IBLRIG_VERSION_TAG' not in md.keys():
1✔
330
        md['IBLRIG_VERSION_TAG'] = ''
×
331
    # 2018-12-05 Version 3.2.3 fixes (permanent fixes in IBL_RIG from 3.2.4 on)
332
    if parse_version(md.get('IBLRIG_VERSION_TAG') or '3.2.3') <= parse_version('3.2.3'):
1✔
333
        if 'LAST_TRIAL_DATA' in md.keys():
1✔
334
            md.pop('LAST_TRIAL_DATA')
1✔
335
        if 'weighings' in md['PYBPOD_SUBJECT_EXTRA'].keys():
1✔
336
            md['PYBPOD_SUBJECT_EXTRA'].pop('weighings')
1✔
337
        if 'water_administration' in md['PYBPOD_SUBJECT_EXTRA'].keys():
1✔
338
            md['PYBPOD_SUBJECT_EXTRA'].pop('water_administration')
1✔
339
        if 'IBLRIG_COMMIT_HASH' not in md.keys():
1✔
340
            md['IBLRIG_COMMIT_HASH'] = 'f9d8905647dbafe1f9bdf78f73b286197ae2647b'
1✔
341
        #  parse the date format to Django supported ISO
342
        dt = dateparser.parse(md['SESSION_DATETIME'])
1✔
343
        md['SESSION_DATETIME'] = date2isostr(dt)
1✔
344
        # add the weight key if it doesn't already exist
345
        if 'SUBJECT_WEIGHT' not in md.keys():
1✔
346
            md['SUBJECT_WEIGHT'] = None
1✔
347
    return md
1✔
348

349

350
def load_settings(session_path: Union[str, Path], task_collection='raw_behavior_data'):
1✔
351
    """
352
    Load PyBpod Settings files (.json).
353

354
    [description]
355

356
    :param session_path: Absolute path of session folder
357
    :type session_path: str, Path
358
    :return: Settings dictionary
359
    :rtype: dict
360
    """
361
    if session_path is None:
1✔
362
        _logger.warning("No data loaded: session_path is None")
×
363
        return
×
364
    path = Path(session_path).joinpath(task_collection)
1✔
365
    path = next(path.glob("_iblrig_taskSettings.raw*.json"), None)
1✔
366
    if not path:
1✔
367
        _logger.warning("No data loaded: could not find raw settings file")
1✔
368
        return None
1✔
369
    settings = _read_settings_json_compatibility_enforced(path)
1✔
370
    return settings
1✔
371

372

373
def load_stim_position_screen(session_path, task_collection='raw_behavior_data'):
1✔
374
    path = Path(session_path).joinpath(task_collection)
×
375
    path = next(path.glob("_iblrig_stimPositionScreen.raw*.csv"), None)
×
376

377
    data = pd.read_csv(path, sep=',', header=None, on_bad_lines='skip')
×
378
    data.columns = ['contrast', 'position', 'bns_ts']
×
379
    data['bns_ts'] = pd.to_datetime(data['bns_ts'])
×
380
    return data
×
381

382

383
def load_encoder_events(session_path, task_collection='raw_behavior_data', settings=False):
1✔
384
    """
385
    Load Rotary Encoder (RE) events raw data file.
386

387
    Assumes that a folder called "raw_behavior_data" exists in folder.
388

389
    Events number correspond to following bpod states:
390
    1: correct / hide_stim
391
    2: stim_on
392
    3: closed_loop
393
    4: freeze_error / freeze_correct
394

395
    >>> data.columns
396
    >>> ['re_ts',   # Rotary Encoder Timestamp (ms) 'numpy.int64'
397
         'sm_ev',   # State Machine Event           'numpy.int64'
398
         'bns_ts']  # Bonsai Timestamp (int)        'pandas.Timestamp'
399
        # pd.to_datetime(data.bns_ts) to work in datetimes
400

401
    :param session_path: [description]
402
    :type session_path: [type]
403
    :return: dataframe w/ 3 cols and (ntrials * 3) lines
404
    :rtype: Pandas.DataFrame
405
    """
406
    if session_path is None:
1✔
407
        return
×
408
    path = Path(session_path).joinpath(task_collection)
1✔
409
    path = next(path.glob("_iblrig_encoderEvents.raw*.ssv"), None)
1✔
410
    if not settings:
1✔
411
        settings = load_settings(session_path, task_collection=task_collection)
1✔
412
    if settings is None or settings['IBLRIG_VERSION_TAG'] == '':
1✔
413
        settings = {'IBLRIG_VERSION_TAG': '100.0.0'}
1✔
414
        # auto-detect old files when version is not labeled
415
        with open(path) as fid:
1✔
416
            line = fid.readline()
1✔
417
        if line.startswith('Event') and 'StateMachine' in line:
1✔
418
            settings = {'IBLRIG_VERSION_TAG': '0.0.0'}
1✔
419
    if not path:
1✔
420
        return None
×
421
    if parse_version(settings['IBLRIG_VERSION_TAG']) >= parse_version('5.0.0'):
1✔
422
        return _load_encoder_events_file_ge5(path)
1✔
423
    else:
424
        return _load_encoder_events_file_lt5(path)
1✔
425

426

427
def _load_encoder_ssv_file(file_path, **kwargs):
1✔
428
    file_path = Path(file_path)
1✔
429
    if file_path.stat().st_size == 0:
1✔
430
        _logger.error(f"{file_path.name} is an empty file. ")
×
431
        raise ValueError(f"{file_path.name} is an empty file. ABORT EXTRACTION. ")
×
432
    return pd.read_csv(file_path, sep=' ', header=None, on_bad_lines='skip', **kwargs)
1✔
433

434

435
def _load_encoder_positions_file_lt5(file_path):
1✔
436
    """
437
    File loader without the session overhead
438
    :param file_path:
439
    :return: dataframe of encoder events
440
    """
441
    data = _load_encoder_ssv_file(file_path,
1✔
442
                                  names=['_', 're_ts', 're_pos', 'bns_ts', '__'],
443
                                  usecols=['re_ts', 're_pos', 'bns_ts'])
444
    return _groom_wheel_data_lt5(data, label='_iblrig_encoderPositions.raw.ssv', path=file_path)
1✔
445

446

447
def _load_encoder_positions_file_ge5(file_path):
1✔
448
    """
449
    File loader without the session overhead
450
    :param file_path:
451
    :return: dataframe of encoder events
452
    """
453
    data = _load_encoder_ssv_file(file_path,
1✔
454
                                  names=['re_ts', 're_pos', '_'],
455
                                  usecols=['re_ts', 're_pos'])
456
    return _groom_wheel_data_ge5(data, label='_iblrig_encoderPositions.raw.ssv', path=file_path)
1✔
457

458

459
def _load_encoder_events_file_lt5(file_path):
1✔
460
    """
461
    File loader without the session overhead
462
    :param file_path:
463
    :return: dataframe of encoder events
464
    """
465
    data = _load_encoder_ssv_file(file_path,
1✔
466
                                  names=['_', 're_ts', '__', 'sm_ev', 'bns_ts', '___'],
467
                                  usecols=['re_ts', 'sm_ev', 'bns_ts'])
468
    return _groom_wheel_data_lt5(data, label='_iblrig_encoderEvents.raw.ssv', path=file_path)
1✔
469

470

471
def _load_encoder_events_file_ge5(file_path):
1✔
472
    """
473
    File loader without the session overhead
474
    :param file_path:
475
    :return: dataframe of encoder events
476
    """
477
    data = _load_encoder_ssv_file(file_path,
1✔
478
                                  names=['re_ts', 'sm_ev', '_'],
479
                                  usecols=['re_ts', 'sm_ev'])
480
    return _groom_wheel_data_ge5(data, label='_iblrig_encoderEvents.raw.ssv', path=file_path)
1✔
481

482

483
def load_encoder_positions(session_path, task_collection='raw_behavior_data', settings=False):
1✔
484
    """
485
    Load Rotary Encoder (RE) positions from raw data file within a session path.
486

487
    Assumes that a folder called "raw_behavior_data" exists in folder.
488
    Positions are RE ticks [-512, 512] == [-180º, 180º]
489
    0 == trial stim init position
490
    Positive nums are rightwards movements (mouse) or RE CW (mouse)
491

492
    Variable line number, depends on movements.
493

494
    Raw datafile Columns:
495
        Position, RE timestamp, RE Position, Bonsai Timestamp
496

497
    Position is always equal to 'Position' so this column was dropped.
498

499
    >>> data.columns
500
    >>> ['re_ts',   # Rotary Encoder Timestamp (ms)     'numpy.int64'
501
         're_pos',  # Rotary Encoder position (ticks)   'numpy.int64'
502
         'bns_ts']  # Bonsai Timestamp                  'pandas.Timestamp'
503
        # pd.to_datetime(data.bns_ts) to work in datetimes
504

505
    :param session_path: Absolute path of session folder
506
    :type session_path: str
507
    :return: dataframe w/ 3 cols and N positions
508
    :rtype: Pandas.DataFrame
509
    """
510
    if session_path is None:
1✔
511
        return
×
512
    path = Path(session_path).joinpath(task_collection)
1✔
513
    path = next(path.glob("_iblrig_encoderPositions.raw*.ssv"), None)
1✔
514
    if not settings:
1✔
515
        settings = load_settings(session_path, task_collection=task_collection)
1✔
516
    if settings is None or settings['IBLRIG_VERSION_TAG'] == '':
1✔
517
        settings = {'IBLRIG_VERSION_TAG': '100.0.0'}
1✔
518
        # auto-detect old files when version is not labeled
519
        with open(path) as fid:
1✔
520
            line = fid.readline()
1✔
521
        if line.startswith('Position'):
1✔
522
            settings = {'IBLRIG_VERSION_TAG': '0.0.0'}
1✔
523
    if not path:
1✔
524
        _logger.warning("No data loaded: could not find raw encoderPositions file")
1✔
525
        return None
1✔
526
    if parse_version(settings['IBLRIG_VERSION_TAG']) >= parse_version('5.0.0'):
1✔
527
        return _load_encoder_positions_file_ge5(path)
1✔
528
    else:
529
        return _load_encoder_positions_file_lt5(path)
1✔
530

531

532
def load_encoder_trial_info(session_path, task_collection='raw_behavior_data'):
1✔
533
    """
534
    Load Rotary Encoder trial info from raw data file.
535

536
    Assumes that a folder calles "raw_behavior_data" exists in folder.
537

538
    NOTE: Last trial probably inexistent data (Trial info is sent on trial start
539
    and data is only saved on trial exit...) max(trialnum) should be N+1 if N
540
    is the amount of trial data saved.
541

542
    Raw datafile Columns:
543

544
    >>> data.columns
545
    >>> ['trial_num',     # Trial Number                     'numpy.int64'
546
         'stim_pos_init', # Initial position of visual stim  'numpy.int64'
547
         'stim_contrast', # Contrast of visual stimulus      'numpy.float64'
548
         'stim_freq',     # Frequency of gabor patch         'numpy.float64'
549
         'stim_angle',    # Angle of Gabor 0 = Vertical      'numpy.float64'
550
         'stim_gain',     # Wheel gain (mm/º of stim)        'numpy.float64'
551
         'stim_sigma',    # Size of patch                    'numpy.float64'
552
         'stim_phase',    # Phase of gabor                    'numpy.float64'
553
         'bns_ts' ]       # Bonsai Timestamp                 'pandas.Timestamp'
554
        # pd.to_datetime(data.bns_ts) to work in datetimes
555

556
    :param session_path: Absoulte path of session folder
557
    :type session_path: str
558
    :return: dataframe w/ 9 cols and ntrials lines
559
    :rtype: Pandas.DataFrame
560
    """
561
    if session_path is None:
1✔
562
        return
×
563
    path = Path(session_path).joinpath(task_collection)
1✔
564
    path = next(path.glob("_iblrig_encoderTrialInfo.raw*.ssv"), None)
1✔
565
    if not path:
1✔
566
        return None
×
567
    data = pd.read_csv(path, sep=' ', header=None)
1✔
568
    data = data.drop([9], axis=1)
1✔
569
    data.columns = ['trial_num', 'stim_pos_init', 'stim_contrast', 'stim_freq',
1✔
570
                    'stim_angle', 'stim_gain', 'stim_sigma', 'stim_phase', 'bns_ts']
571
    # return _groom_wheel_data_lt5(data, label='_iblrig_encoderEvents.raw.ssv', path=path)
572
    return data
1✔
573

574

575
def load_ambient_sensor(session_path, task_collection='raw_behavior_data'):
1✔
576
    """
577
    Load Ambient Sensor data from session.
578

579
    Probably could be extracted to DatasetTypes:
580
    _ibl_trials.temperature_C, _ibl_trials.airPressure_mb,
581
    _ibl_trials.relativeHumidity
582
    Returns a list of dicts one dict per trial.
583
    dict keys are:
584
    dict_keys(['Temperature_C', 'AirPressure_mb', 'RelativeHumidity'])
585

586
    :param session_path: Absoulte path of session folder
587
    :type session_path: str
588
    :return: list of dicts
589
    :rtype: list
590
    """
591
    if session_path is None:
×
592
        return
×
593
    path = Path(session_path).joinpath(task_collection)
×
594
    path = next(path.glob("_iblrig_ambientSensorData.raw*.jsonable"), None)
×
595
    if not path:
×
596
        return None
×
597
    data = []
×
598
    with open(path, 'r') as f:
×
599
        for line in f:
×
600
            data.append(json.loads(line))
×
601
    return data
×
602

603

604
def load_mic(session_path, task_collection='raw_behavior_data'):
1✔
605
    """
606
    Load Microphone wav file to np.array of len nSamples
607

608
    :param session_path: Absolute path of session folder
609
    :type session_path: str
610
    :return: An array of values of the sound waveform
611
    :rtype: numpy.array
612
    """
613
    if session_path is None:
×
614
        return
×
615
    path = Path(session_path).joinpath(task_collection)
×
616
    path = next(path.glob("_iblrig_micData.raw*.wav"), None)
×
617
    if not path:
×
618
        return None
×
619
    fp = wave.open(path)
×
620
    nchan = fp.getnchannels()
×
621
    N = fp.getnframes()
×
622
    dstr = fp.readframes(N * nchan)
×
623
    data = np.frombuffer(dstr, np.int16)
×
624
    data = np.reshape(data, (-1, nchan))
×
625
    return data
×
626

627

628
def _clean_wheel_dataframe(data, label, path):
1✔
629
    if np.any(data.isna()):
1✔
630
        _logger.warning(label + ' has missing/incomplete records \n %s', path)
1✔
631
    # first step is to re-interpret as numeric objects if not already done
632
    for col in data.columns:
1✔
633
        if data[col].dtype == object and col not in ['bns_ts']:
1✔
634
            data[col] = pd.to_numeric(data[col], errors='coerce')
1✔
635
    # then drop Nans and duplicates
636
    data.dropna(inplace=True)
1✔
637
    data.drop_duplicates(keep='first', inplace=True)
1✔
638
    data.reset_index(inplace=True)
1✔
639
    # handle the clock resets when microseconds exceed uint32 max value
640
    drop_first = False
1✔
641
    data['re_ts'] = data['re_ts'].astype(np.double, copy=False)
1✔
642
    if any(np.diff(data['re_ts']) < 0):
1✔
643
        ind = np.where(np.diff(data['re_ts']) < 0)[0]
1✔
644
        for i in ind:
1✔
645
            # the first sample may be corrupt, in this case throw away
646
            if i <= 1:
1✔
647
                drop_first = i
1✔
648
                _logger.warning(label + ' rotary encoder positions timestamps'
1✔
649
                                        ' first sample corrupt ' + str(path))
650
            # if it's an uint32 wraparound, the diff should be close to 2 ** 32
651
            elif 32 - np.log2(data['re_ts'][i] - data['re_ts'][i + 1]) < 0.2:
1✔
652
                data.loc[i + 1:, 're_ts'] = data.loc[i + 1:, 're_ts'] + 2 ** 32
1✔
653
            # there is also the case where 2 positions are swapped and need to be swapped back
654

655
            elif data['re_ts'][i] > data['re_ts'][i + 1] > data['re_ts'][i - 1]:
1✔
656
                _logger.warning(label + ' rotary encoder timestamps swapped at index: ' +
1✔
657
                                str(i) + '  ' + str(path))
658
                a, b = data.iloc[i].copy(), data.iloc[i + 1].copy()
1✔
659
                data.iloc[i], data.iloc[i + 1] = b, a
1✔
660
            # if none of those 3 cases apply, raise an error
661
            else:
662
                _logger.error(label + ' Rotary encoder timestamps are not sorted.' + str(path))
1✔
663
                data.sort_values('re_ts', inplace=True)
1✔
664
                data.reset_index(inplace=True)
1✔
665
    if drop_first is not False:
1✔
666
        data.drop(data.loc[:drop_first].index, inplace=True)
1✔
667
        data = data.reindex()
1✔
668
    return data
1✔
669

670

671
def _groom_wheel_data_lt5(data, label='file ', path=''):
1✔
672
    """
673
    The whole purpose of this function is to account for variability and corruption in
674
    the wheel position files. There are many possible errors described below, but
675
    nothing excludes getting new ones.
676
    """
677
    data = _clean_wheel_dataframe(data, label, path)
1✔
678
    data.drop(data.loc[data.bns_ts.apply(len) != 33].index, inplace=True)
1✔
679
    # check if the time scale is in ms
680
    sess_len_sec = (datetime.strptime(data['bns_ts'].iloc[-1][:25], '%Y-%m-%dT%H:%M:%S.%f') -
1✔
681
                    datetime.strptime(data['bns_ts'].iloc[0][:25], '%Y-%m-%dT%H:%M:%S.%f')).seconds
682
    if data['re_ts'].iloc[-1] / (sess_len_sec + 1e-6) < 1e5:  # should be 1e6 normally
1✔
683
        _logger.warning('Rotary encoder reset logs events in ms instead of us: ' +
1✔
684
                        'RE firmware needs upgrading and wheel velocity is potentially inaccurate')
685
        data['re_ts'] = data['re_ts'] * 1000
1✔
686
    return data
1✔
687

688

689
def _groom_wheel_data_ge5(data, label='file ', path=''):
1✔
690
    """
691
    The whole purpose of this function is to account for variability and corruption in
692
    the wheel position files. There are many possible errors described below, but
693
    nothing excludes getting new ones.
694
    """
695
    data = _clean_wheel_dataframe(data, label, path)
1✔
696
    # check if the time scale is in ms
697
    if (data['re_ts'].iloc[-1] - data['re_ts'].iloc[0]) / 1e6 < 20:
1✔
698
        _logger.warning('Rotary encoder reset logs events in ms instead of us: ' +
1✔
699
                        'RE firmware needs upgrading and wheel velocity is potentially inaccurate')
700
        data['re_ts'] = data['re_ts'] * 1000
1✔
701
    return data
1✔
702

703

704
def save_bool(save, dataset_type):
1✔
705
    if isinstance(save, bool):
×
706
        out = save
×
707
    elif isinstance(save, list):
×
708
        out = (dataset_type in save) or (Path(dataset_type).stem in save)
×
709
    if out:
×
710
        _logger.debug('extracting' + dataset_type)
×
711
    return out
×
712

713

714
def sync_trials_robust(t0, t1, diff_threshold=0.001, drift_threshold_ppm=200, max_shift=5,
1✔
715
                       return_index=False):
716
    """
717
    Attempts to find matching timestamps in 2 time-series that have an offset, are drifting,
718
    and are most likely incomplete: sizes don't have to match, some pulses may be missing
719
    in any series.
720
    Only works with irregular time series as it relies on the derivative to match sync.
721
    :param t0:
722
    :param t1:
723
    :param diff_threshold:
724
    :param drift_threshold_ppm: (150)
725
    :param max_shift: (200)
726
    :param return_index (False)
727
    :return:
728
    """
729
    nsync = min(t0.size, t1.size)
1✔
730
    dt0 = np.diff(t0)
1✔
731
    dt1 = np.diff(t1)
1✔
732
    ind = np.zeros_like(dt0) * np.nan
1✔
733
    i0 = 0
1✔
734
    i1 = 0
1✔
735
    cdt = np.nan  # the current time difference between the two series to compute drift
1✔
736
    while i0 < (nsync - 1):
1✔
737
        # look in the next max_shift events the ones whose derivative match
738
        isearch = np.arange(i1, min(max_shift + i1, dt1.size))
1✔
739
        dec = np.abs(dt0[i0] - dt1[isearch]) < diff_threshold
1✔
740
        # another constraint is to check the dt for the maximum drift
741
        if ~np.isnan(cdt):
1✔
742
            drift_ppm = np.abs((cdt - (t0[i0] - t1[isearch])) / dt1[isearch]) * 1e6
1✔
743
            dec = np.logical_and(dec, drift_ppm <= drift_threshold_ppm)
1✔
744
        # if one is found
745
        if np.any(dec):
1✔
746
            ii1 = np.where(dec)[0][0]
1✔
747
            ind[i0] = i1 + ii1
1✔
748
            i1 += ii1 + 1
1✔
749
            cdt = t0[i0 + 1] - t1[i1 + ii1]
1✔
750
        i0 += 1
1✔
751
    it0 = np.where(~np.isnan(ind))[0]
1✔
752
    it1 = ind[it0].astype(int)
1✔
753
    ind0 = np.unique(np.r_[it0, it0 + 1])
1✔
754
    ind1 = np.unique(np.r_[it1, it1 + 1])
1✔
755
    if return_index:
1✔
756
        return t0[ind0], t1[ind1], ind0, ind1
×
757
    else:
758
        return t0[ind0], t1[ind1]
1✔
759

760

761
def load_bpod_fronts(session_path: str, data: list = False, task_collection: str = 'raw_behavior_data') -> list:
1✔
762
    """load_bpod_fronts
763
    Loads BNC1 and BNC2 bpod channels times and polarities from session_path
764

765
    :param session_path: a valid session_path
766
    :type session_path: str
767
    :param data: pre-loaded raw data dict, defaults to False
768
    :type data: list, optional
769
    :return: List of dicts BNC1 and BNC2 {"times": np.array, "polarities":np.array}
770
    :rtype: list
771
    """
772
    if not data:
1✔
773
        data = load_data(session_path, task_collection)
×
774

775
    BNC1_fronts = np.array([[np.nan, np.nan]])
1✔
776
    BNC2_fronts = np.array([[np.nan, np.nan]])
1✔
777
    for tr in data:
1✔
778
        BNC1_fronts = np.append(
1✔
779
            BNC1_fronts,
780
            np.array(
781
                [
782
                    [x, 1]
783
                    for x in tr["behavior_data"]["Events timestamps"].get("BNC1High", [np.nan])
784
                ]
785
            ),
786
            axis=0,
787
        )
788
        BNC1_fronts = np.append(
1✔
789
            BNC1_fronts,
790
            np.array(
791
                [
792
                    [x, -1]
793
                    for x in tr["behavior_data"]["Events timestamps"].get("BNC1Low", [np.nan])
794
                ]
795
            ),
796
            axis=0,
797
        )
798
        BNC2_fronts = np.append(
1✔
799
            BNC2_fronts,
800
            np.array(
801
                [
802
                    [x, 1]
803
                    for x in tr["behavior_data"]["Events timestamps"].get("BNC2High", [np.nan])
804
                ]
805
            ),
806
            axis=0,
807
        )
808
        BNC2_fronts = np.append(
1✔
809
            BNC2_fronts,
810
            np.array(
811
                [
812
                    [x, -1]
813
                    for x in tr["behavior_data"]["Events timestamps"].get("BNC2Low", [np.nan])
814
                ]
815
            ),
816
            axis=0,
817
        )
818

819
    BNC1_fronts = BNC1_fronts[1:, :]
1✔
820
    BNC1_fronts = BNC1_fronts[BNC1_fronts[:, 0].argsort()]
1✔
821
    BNC2_fronts = BNC2_fronts[1:, :]
1✔
822
    BNC2_fronts = BNC2_fronts[BNC2_fronts[:, 0].argsort()]
1✔
823

824
    BNC1 = {"times": BNC1_fronts[:, 0], "polarities": BNC1_fronts[:, 1]}
1✔
825
    BNC2 = {"times": BNC2_fronts[:, 0], "polarities": BNC2_fronts[:, 1]}
1✔
826

827
    return [BNC1, BNC2]
1✔
828

829

830
def get_port_events(trial: dict, name: str = '') -> list:
1✔
831
    """get_port_events
832
    Return all event timestamps from bpod raw data trial that match 'name'
833
    --> looks in trial['behavior_data']['Events timestamps']
834

835
    :param trial: raw trial dict
836
    :type trial: dict
837
    :param name: name of event, defaults to ''
838
    :type name: str, optional
839
    :return: Sorted list of event timestamps
840
    :rtype: list
841
    TODO: add polarities?
842
    """
843
    out: list = []
1✔
844
    events = trial['behavior_data']['Events timestamps']
1✔
845
    for k in events:
1✔
846
        if name in k:
1✔
847
            out.extend(events[k])
1✔
848
    out = sorted(out)
1✔
849

850
    return out
1✔
851

852

853
def load_widefield_mmap(session_path, dtype=np.uint16, shape=(540, 640), n_frames=None, mode='r'):
1✔
854
    """
855
    TODO Document this function
856

857
    Parameters
858
    ----------
859
    session_path
860

861
    Returns
862
    -------
863

864
    """
865
    filepath = Path(session_path).joinpath('raw_widefield_data').glob('widefield.raw.*.dat')
×
866
    filepath = next(filepath, None)
×
867
    if not filepath:
×
868
        _logger.warning("No data loaded: could not find raw data file")
×
869
        return None
×
870

871
    if type(dtype) is str:
×
872
        dtype = np.dtype(dtype)
×
873

874
    if n_frames is None:
×
875
        # Get the number of samples from the file size
876
        n_frames = int(filepath.stat().st_size / (np.prod(shape) * dtype.itemsize))
×
877

878
    return np.memmap(str(filepath), mode=mode, dtype=dtype, shape=(int(n_frames), *shape))
×
879

880

881
def patch_settings(session_path, collection='raw_behavior_data',
1✔
882
                   new_collection=None, subject=None, number=None, date=None):
883
    """Modify various details in a settings file.
884

885
    This function makes it easier to change things like subject name in a settings as it will
886
    modify the subject name in the myriad paths. NB: This saves the settings into the same location
887
    it was loaded from.
888

889
    Parameters
890
    ----------
891
    session_path : str, pathlib.Path
892
        The session path containing the settings file.
893
    collection : str
894
        The subfolder containing the settings file.
895
    new_collection : str
896
        An optional new subfolder to change in the settings paths.
897
    subject : str
898
        An optional new subject name to change in the settings.
899
    number : str, int
900
        An optional new number to change in the settings.
901
    date : str, datetime.date
902
        An optional date to change in the settings.
903

904
    Returns
905
    -------
906
    dict
907
        The modified settings.
908
    """
909
    settings = load_settings(session_path, collection)
1✔
910
    if not settings:
1✔
911
        raise IOError('Settings file not found')
1✔
912

913
    filename = PureWindowsPath(settings['SETTINGS_FILE_PATH']).name
1✔
914
    file_path = Path(session_path).joinpath(collection, filename)
1✔
915

916
    if subject:
1✔
917
        # Patch subject name
918
        old_subject = settings['SUBJECT_NAME']
1✔
919
        settings['SUBJECT_NAME'] = subject
1✔
920
        for k in settings.keys():
1✔
921
            if isinstance(settings[k], str):
1✔
922
                settings[k] = settings[k].replace(f'\\Subjects\\{old_subject}', f'\\Subjects\\{subject}')
1✔
923
        settings['SESSION_NAME'] = '\\'.join([subject, *settings['SESSION_NAME'].split('\\')[1:]])
1✔
924
        settings.pop('PYBPOD_SUBJECT_EXTRA')  # Get rid of Alyx subject info
1✔
925

926
    if date:
1✔
927
        # Patch session datetime
928
        date = str(date)
1✔
929
        old_date = settings['SESSION_DATE']
1✔
930
        settings['SESSION_DATE'] = date
1✔
931
        for k in settings.keys():
1✔
932
            if isinstance(settings[k], str):
1✔
933
                settings[k] = settings[k].replace(
1✔
934
                    f'\\{settings["SUBJECT_NAME"]}\\{old_date}',
935
                    f'\\{settings["SUBJECT_NAME"]}\\{date}'
936
                )
937
        settings['SESSION_DATETIME'] = date + settings['SESSION_DATETIME'][10:]
1✔
938

939
    if number:
1✔
940
        # Patch session number
941
        old_number = settings['SESSION_NUMBER']
1✔
942
        if isinstance(number, int):
1✔
943
            number = f'{number:03}'
1✔
944
        settings['SESSION_NUMBER'] = number
1✔
945
        for k in settings.keys():
1✔
946
            if isinstance(settings[k], str):
1✔
947
                settings[k] = settings[k].replace(
1✔
948
                    f'\\{settings["SESSION_DATE"]}\\{old_number}',
949
                    f'\\{settings["SESSION_DATE"]}\\{number}'
950
                )
951

952
    if new_collection:
1✔
953
        old_path = settings['SESSION_RAW_DATA_FOLDER']
1✔
954
        new_path = PureWindowsPath(settings['SESSION_RAW_DATA_FOLDER']).with_name(new_collection)
1✔
955
        for k in settings.keys():
1✔
956
            if isinstance(settings[k], str):
1✔
957
                settings[k] = settings[k].replace(old_path, str(new_path))
1✔
958
    with open(file_path, 'w') as fp:
1✔
959
        json.dump(settings, fp, indent=' ')
1✔
960
    return settings
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc