15073834064

Committed 16 May 2025 05:16PM UTC coverage: 49.414% (+2.6%) from 46.79%

Build # 15073834064

Build Type

Pull #750

github

Specific Base c98309

Committed by

web-flow

Commit Message

Merge 8e475a77c into e481532ae

Pull Request Pull Request #750: Online plots

Run Details

538 of 720 new or added lines in 3 files covered. (74.72%)

1000 existing lines in 20 files now uncovered.

4677 of 9465 relevant lines covered (49.41%)

0.49 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.51

/iblrig/raw_data_loaders.py

import json
import logging
import re
from pathlib import Path
from typing import Any

import numpy as np
import pandas as pd
from pandas.core.dtypes.concat import union_categoricals

log = logging.getLogger(__name__)
RE_PATTERN_EVENT = re.compile(r'^(?P<Channel>\D+\d?)_?(?P<Value>.*)$')


def load_task_jsonable(jsonable_file: str | Path, offset: int = 0) -> tuple[pd.DataFrame, list[Any]]:
    """
    Reads in a task data jsonable file and returns a trials dataframe and a bpod data list.

    Parameters
    ----------
    jsonable_file : str or Path
        full path to jsonable file.
    offset : int, optional
        The offset to start reading from. Defaults to 0.

    Returns
    -------
    tuple
        A tuple containing

        *  trials_table : pandas.DataFrame
              A DataFrame with the trial info in the same format as the Session trials table.
        *  bpod_data : list
              timing data for each trial
    """
    with open(jsonable_file) as f:
        f.seek(offset, 0)
        trials_table = [json.loads(line) for line in f]

    # pop out bpod data
    bpod_data = [td.pop('behavior_data') for td in trials_table]

    return pd.DataFrame(trials_table), bpod_data


def bpod_session_data_to_dataframe(bpod_data: list[dict[str, Any]], existing_data: pd.DataFrame | None = None) -> pd.DataFrame:
    """
    Convert Bpod session data into a single Pandas DataFrame.

    Parameters
    ----------
    bpod_data : list of dict
        A list of dictionaries as returned by load_task_jsonable, where each dictionary contains data for a single trial.
    existing_data : pd.DataFrame
        Existing dataframe that the incoming data will be appended to.

    Returns
    -------
    pd.DataFrame
        A Pandas DataFrame containing event data from the specified trials, with the following columns:

        *  Time : datetime.timedelta
              timestamp of the event (datetime.timedelta)
        *  Type : str (categorical)
              type of the event (TrialStart, StateStart, InputEvent, etc.)
        *  Trial : int
              index of the trial, zero-based
        *  State : str (categorical)
              name of the state
        *  Event : str (categorical)
              name of the event (only for type InputEvent)
        *  Channel : str (categorical)
              name of the event's channel (only for a subset of InputEvents)
        *  Value : int
              value of the event (only for a subset of InputEvents)
    """
    # define trial index
    trials = np.arange(len(bpod_data))
    if existing_data is not None and 'Trial' in existing_data:
        trials += existing_data.iloc[-1].Trial + 1

    # loop over requested trials
    dataframes = [] if existing_data is None or len(existing_data) == 0 else [existing_data]
    for index, trial in enumerate(trials):
        dataframes.append(bpod_trial_data_to_dataframe(bpod_data[index], trial))

    return concat_bpod_dataframes(dataframes)


def concat_bpod_dataframes(dataframes: list[pd.DataFrame]) -> pd.DataFrame:
    """
    Concatenate a list of DataFrames containing Bpod trial data into a single DataFrame.

    Parameters
    ----------
    dataframes : list of DataFrames
        A list of dictionaries as returned by load_task_jsonable, where each dictionary contains data for a single trial.

    Returns
    -------
    pd.DataFrame
        A Pandas DataFrame containing event data from the specified trials, with the following columns:

        *  Time : datetime.timedelta
              timestamp of the event (datetime.timedelta)
        *  Type : str (categorical)
              type of the event (TrialStart, StateStart, InputEvent, etc.)
        *  Trial : int
              index of the trial, zero-based
        *  State : str (categorical)
              name of the state
        *  Event : str (categorical)
              name of the event (only for type InputEvent)
        *  Channel : str (categorical)
              name of the event's channel (only for a subset of InputEvents)
        *  Value : int
              value of the event (only for a subset of InputEvents)
    """
    categories_type = union_categoricals([df['Type'] for df in dataframes])
    categories_state = union_categoricals([df['State'] for df in dataframes])
    categories_event = union_categoricals([df['Event'] for df in dataframes])
    categories_channel = union_categoricals([df['Channel'] for df in dataframes])
    for df in dataframes:
        df['Type'] = df['Type'].cat.set_categories(categories_type.categories)
        df['State'] = df['State'].cat.set_categories(categories_state.categories)
        df['Event'] = df['Event'].cat.set_categories(categories_event.categories)
        df['Channel'] = df['Channel'].cat.set_categories(categories_channel.categories)
    return pd.concat(dataframes)


def bpod_trial_data_to_dataframes(
    bpod_trial_data: list[dict[str, Any]], existing_data: list[pd.DataFrame] | None = None
) -> list[pd.DataFrame]:
    """
    Convert a list of Bpod trial data dictionaries into a list of Pandas DataFrames.

    Each DataFrame corresponds to a single trial's data, as returned by `bpod_trial_data_to_dataframe`. If existing DataFrames are
    provided, the new DataFrames will be appended to this list.

    Parameters
    ----------
    bpod_trial_data : list of dict
        A list of dictionaries, where each dictionary contains data for a single trial.
    existing_data : list of pd.DataFrame, optional
        An optional list of existing DataFrames to which the new DataFrames will be appended. If None, a new list will be created.

    Returns
    -------
    list of pd.DataFrame
        A list of Pandas DataFrames, each containing event data from the corresponding trial.
    """
    dataframes = existing_data if existing_data is not None else list()
    trial_number = len(dataframes)
    for single_trial_data in bpod_trial_data:
        dataframes.append(bpod_trial_data_to_dataframe(bpod_trial_data=single_trial_data, trial=trial_number))
        trial_number += 1
    return dataframes


def bpod_trial_data_to_dataframe(bpod_trial_data: dict[str, Any], trial: int) -> pd.DataFrame:
    """
    Convert a single Bpod trial's data into a Pandas DataFrame.

    Parameters
    ----------
    bpod_trial_data : dict
        A dictionary containing data for a single trial, including timestamps and events.
    trial : int
        An integer representing the trial index.

    Returns
    -------
    pd.DataFrame
        A Pandas DataFrame containing event data from the specified trial, with the following columns:

        *  Time : datetime.timedelta
              timestamp of the event (datetime.timedelta)
        *  Type : str (categorical)
              type of the event (TrialStart, StateStart, InputEvent, etc.)
        *  Trial : int
              index of the trial, zero-based
        *  State : str (categorical)
              name of the state
        *  Event : str (categorical)
              name of the event (only for type InputEvent)
        *  Channel : str (categorical)
              name of the event's channel (only for a subset of InputEvents)
        *  Value : int
              value of the event (only for a subset of InputEvents)
    """
    trial_start = bpod_trial_data['Trial start timestamp']
    trial_end = bpod_trial_data['Trial end timestamp']

    state_times = bpod_trial_data['States timestamps'].items()
    event_times = bpod_trial_data['Events timestamps'].items()

    # convert bpod data to list of tuples
    event_list = [(0, 'TrialStart', pd.NA, pd.NA)]
    event_list += [(t, 'StateStart', state, pd.NA) for state, times in state_times for t, _ in times if not np.isnan(t)]
    event_list += [(t, 'InputEvent', pd.NA, event) for event, times in event_times for t in times]
    event_list += [(t, 'StateEnd', state, pd.NA) for state, times in state_times for _, t in times if not np.isnan(t)]
    event_list += [(trial_end - trial_start, 'TrialEnd', pd.NA, pd.NA)]
    event_list = sorted(event_list)

    # create dataframe with TimedeltaIndex
    df = pd.DataFrame(data=event_list, columns=['Time', 'Type', 'State', 'Event'])
    df.Time = np.array((df.Time + trial_start) * 1e6, dtype='timedelta64[us]')
    df.set_index('Time', inplace=True)

    # cast types
    df['Type'] = df['Type'].astype('category')
    df['State'] = df['State'].astype('category').ffill()
    df['Event'] = df['Event'].astype('category')
    df.insert(2, 'Trial', pd.to_numeric([trial], downcast='unsigned')[0])

    # extract channel name and value from Event strings
    # since 'Event' is categorical, only process its unique values for performance
    mappings = df['Event'].cat.categories.to_series().str.extract(RE_PATTERN_EVENT, expand=True)
    mappings['Channel'] = mappings['Channel'].astype('category')
    mappings['Value'] = mappings['Value'].replace({'Low': '0', 'High': '1', 'Out': '0', 'In': '1'})
    mappings['Value'] = pd.to_numeric(mappings['Value'], errors='coerce', downcast='unsigned', dtype_backend='numpy_nullable')

    # map the extracted channel and value information back to the DataFrame.
    df['Channel'] = df['Event'].map(mappings['Channel'])
    df['Value'] = df['Event'].map(mappings['Value'])

    return df

1	import json	1✔
2	import logging	1✔
3	import re	1✔
4	from pathlib import Path	1✔
5	from typing import Any	1✔
6
7	import numpy as np	1✔
8	import pandas as pd	1✔
9	from pandas.core.dtypes.concat import union_categoricals	1✔
10
11	log = logging.getLogger(__name__)	1✔
12	RE_PATTERN_EVENT = re.compile(r'^(?P<Channel>\D+\d?)_?(?P<Value>.*)$')	1✔
13
14
15	def load_task_jsonable(jsonable_file: str \| Path, offset: int = 0) -> tuple[pd.DataFrame, list[Any]]:	1✔
16	"""
17	Reads in a task data jsonable file and returns a trials dataframe and a bpod data list.
18
19	Parameters
20	----------
21	jsonable_file : str or Path
22	full path to jsonable file.
23	offset : int, optional
24	The offset to start reading from. Defaults to 0.
25
26	Returns
27	-------
28	tuple
29	A tuple containing
30
31	* trials_table : pandas.DataFrame
32	A DataFrame with the trial info in the same format as the Session trials table.
33	* bpod_data : list
34	timing data for each trial
35	"""
36	with open(jsonable_file) as f:	1✔
37	f.seek(offset, 0)	1✔
38	trials_table = [json.loads(line) for line in f]	1✔
39
40	# pop out bpod data
41	bpod_data = [td.pop('behavior_data') for td in trials_table]	1✔
42
43	return pd.DataFrame(trials_table), bpod_data	1✔
44
45
46	def bpod_session_data_to_dataframe(bpod_data: list[dict[str, Any]], existing_data: pd.DataFrame \| None = None) -> pd.DataFrame:	1✔
47	"""
48	Convert Bpod session data into a single Pandas DataFrame.
49
50	Parameters
51	----------
52	bpod_data : list of dict
53	A list of dictionaries as returned by load_task_jsonable, where each dictionary contains data for a single trial.
54	existing_data : pd.DataFrame
55	Existing dataframe that the incoming data will be appended to.
56
57	Returns
58	-------
59	pd.DataFrame
60	A Pandas DataFrame containing event data from the specified trials, with the following columns:
61
62	* Time : datetime.timedelta
63	timestamp of the event (datetime.timedelta)
64	* Type : str (categorical)
65	type of the event (TrialStart, StateStart, InputEvent, etc.)
66	* Trial : int
67	index of the trial, zero-based
68	* State : str (categorical)
69	name of the state
70	* Event : str (categorical)
71	name of the event (only for type InputEvent)
72	* Channel : str (categorical)
73	name of the event's channel (only for a subset of InputEvents)
74	* Value : int
75	value of the event (only for a subset of InputEvents)
76	"""
77	# define trial index
78	trials = np.arange(len(bpod_data))	1✔
79	if existing_data is not None and 'Trial' in existing_data:	1✔
NEW 80	trials += existing_data.iloc[-1].Trial + 1	×
81
82	# loop over requested trials
83	dataframes = [] if existing_data is None or len(existing_data) == 0 else [existing_data]	1✔
84	for index, trial in enumerate(trials):	1✔
85	dataframes.append(bpod_trial_data_to_dataframe(bpod_data[index], trial))	1✔
86
87	return concat_bpod_dataframes(dataframes)	1✔
88
89
90	def concat_bpod_dataframes(dataframes: list[pd.DataFrame]) -> pd.DataFrame:	1✔
91	"""
92	Concatenate a list of DataFrames containing Bpod trial data into a single DataFrame.
93
94	Parameters
95	----------
96	dataframes : list of DataFrames
97	A list of dictionaries as returned by load_task_jsonable, where each dictionary contains data for a single trial.
98
99	Returns
100	-------
101	pd.DataFrame
102	A Pandas DataFrame containing event data from the specified trials, with the following columns:
103
104	* Time : datetime.timedelta
105	timestamp of the event (datetime.timedelta)
106	* Type : str (categorical)
107	type of the event (TrialStart, StateStart, InputEvent, etc.)
108	* Trial : int
109	index of the trial, zero-based
110	* State : str (categorical)
111	name of the state
112	* Event : str (categorical)
113	name of the event (only for type InputEvent)
114	* Channel : str (categorical)
115	name of the event's channel (only for a subset of InputEvents)
116	* Value : int
117	value of the event (only for a subset of InputEvents)
118	"""
119	categories_type = union_categoricals([df['Type'] for df in dataframes])	1✔
120	categories_state = union_categoricals([df['State'] for df in dataframes])	1✔
121	categories_event = union_categoricals([df['Event'] for df in dataframes])	1✔
122	categories_channel = union_categoricals([df['Channel'] for df in dataframes])	1✔
123	for df in dataframes:	1✔
124	df['Type'] = df['Type'].cat.set_categories(categories_type.categories)	1✔
125	df['State'] = df['State'].cat.set_categories(categories_state.categories)	1✔
126	df['Event'] = df['Event'].cat.set_categories(categories_event.categories)	1✔
127	df['Channel'] = df['Channel'].cat.set_categories(categories_channel.categories)	1✔
128	return pd.concat(dataframes)	1✔
129
130
131	def bpod_trial_data_to_dataframes(	1✔
132	bpod_trial_data: list[dict[str, Any]], existing_data: list[pd.DataFrame] \| None = None
133	) -> list[pd.DataFrame]:
134	"""
135	Convert a list of Bpod trial data dictionaries into a list of Pandas DataFrames.
136
137	Each DataFrame corresponds to a single trial's data, as returned by `bpod_trial_data_to_dataframe`. If existing DataFrames are
138	provided, the new DataFrames will be appended to this list.
139
140	Parameters
141	----------
142	bpod_trial_data : list of dict
143	A list of dictionaries, where each dictionary contains data for a single trial.
144	existing_data : list of pd.DataFrame, optional
145	An optional list of existing DataFrames to which the new DataFrames will be appended. If None, a new list will be created.
146
147	Returns
148	-------
149	list of pd.DataFrame
150	A list of Pandas DataFrames, each containing event data from the corresponding trial.
151	"""
152	dataframes = existing_data if existing_data is not None else list()	1✔
153	trial_number = len(dataframes)	1✔
154	for single_trial_data in bpod_trial_data:	1✔
155	dataframes.append(bpod_trial_data_to_dataframe(bpod_trial_data=single_trial_data, trial=trial_number))	1✔
156	trial_number += 1	1✔
157	return dataframes	1✔
158
159
160	def bpod_trial_data_to_dataframe(bpod_trial_data: dict[str, Any], trial: int) -> pd.DataFrame:	1✔
161	"""
162	Convert a single Bpod trial's data into a Pandas DataFrame.
163
164	Parameters
165	----------
166	bpod_trial_data : dict
167	A dictionary containing data for a single trial, including timestamps and events.
168	trial : int
169	An integer representing the trial index.
170
171	Returns
172	-------
173	pd.DataFrame
174	A Pandas DataFrame containing event data from the specified trial, with the following columns:
175
176	* Time : datetime.timedelta
177	timestamp of the event (datetime.timedelta)
178	* Type : str (categorical)
179	type of the event (TrialStart, StateStart, InputEvent, etc.)
180	* Trial : int
181	index of the trial, zero-based
182	* State : str (categorical)
183	name of the state
184	* Event : str (categorical)
185	name of the event (only for type InputEvent)
186	* Channel : str (categorical)
187	name of the event's channel (only for a subset of InputEvents)
188	* Value : int
189	value of the event (only for a subset of InputEvents)
190	"""
191	trial_start = bpod_trial_data['Trial start timestamp']	1✔
192	trial_end = bpod_trial_data['Trial end timestamp']	1✔
193
194	state_times = bpod_trial_data['States timestamps'].items()	1✔
195	event_times = bpod_trial_data['Events timestamps'].items()	1✔
196
197	# convert bpod data to list of tuples
198	event_list = [(0, 'TrialStart', pd.NA, pd.NA)]	1✔
199	event_list += [(t, 'StateStart', state, pd.NA) for state, times in state_times for t, _ in times if not np.isnan(t)]	1✔
200	event_list += [(t, 'InputEvent', pd.NA, event) for event, times in event_times for t in times]	1✔
201	event_list += [(t, 'StateEnd', state, pd.NA) for state, times in state_times for _, t in times if not np.isnan(t)]	1✔
202	event_list += [(trial_end - trial_start, 'TrialEnd', pd.NA, pd.NA)]	1✔
203	event_list = sorted(event_list)	1✔
204
205	# create dataframe with TimedeltaIndex
206	df = pd.DataFrame(data=event_list, columns=['Time', 'Type', 'State', 'Event'])	1✔
207	df.Time = np.array((df.Time + trial_start) * 1e6, dtype='timedelta64[us]')	1✔
208	df.set_index('Time', inplace=True)	1✔
209
210	# cast types
211	df['Type'] = df['Type'].astype('category')	1✔
212	df['State'] = df['State'].astype('category').ffill()	1✔
213	df['Event'] = df['Event'].astype('category')	1✔
214	df.insert(2, 'Trial', pd.to_numeric([trial], downcast='unsigned')[0])	1✔
215
216	# extract channel name and value from Event strings
217	# since 'Event' is categorical, only process its unique values for performance
218	mappings = df['Event'].cat.categories.to_series().str.extract(RE_PATTERN_EVENT, expand=True)	1✔
219	mappings['Channel'] = mappings['Channel'].astype('category')	1✔
220	mappings['Value'] = mappings['Value'].replace({'Low': '0', 'High': '1', 'Out': '0', 'In': '1'})	1✔
221	mappings['Value'] = pd.to_numeric(mappings['Value'], errors='coerce', downcast='unsigned', dtype_backend='numpy_nullable')	1✔
222
223	# map the extracted channel and value information back to the DataFrame.
224	df['Channel'] = df['Event'].map(mappings['Channel'])	1✔
225	df['Value'] = df['Event'].map(mappings['Value'])	1✔
226
227	return df	1✔

int-brain-lab / iblrig / 15073834064

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous