8ac704ce-924d-4415-96d0-a7a53cd460d1

Build # 8ac704ce-924d-4415-96d0-a7a53cd460d1

Build Type

circle-ci

Committed by xzdandy

Commit Message

Merge branch 'obj-tracking' of github.com:georgia-tech-db/eva into obj-tracking

Pull Request Pull Request #566: feat: object tracking

Run Details

155 of 155 new or added lines in 16 files covered. (100.0%)

9371 of 9588 relevant lines covered (97.74%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.01

/eva/models/storage/batch.py

# coding=utf-8
# Copyright 2018-2022 EVA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Callable, Iterable, List, TypeVar, Union

import numpy as np
import pandas as pd

from eva.expression.abstract_expression import ExpressionType
from eva.parser.alias import Alias
from eva.utils.generic_utils import PickleSerializer
from eva.utils.logging_manager import logger

Batch = TypeVar("Batch")


class Batch:
    """
    Data model used for storing a batch of frames.
    Internally stored as a pandas DataFrame with columns
    "id" and "data".
    id: integer index of frame
    data: frame as np.array

    Arguments:
        frames (DataFrame): pandas Dataframe holding frames data
    """

    def __init__(self, frames=None):
        self._frames = pd.DataFrame() if frames is None else frames
        if not isinstance(self._frames, pd.DataFrame):
            raise ValueError(
                "Batch constructor not properly called.\n" "Expected pandas.DataFrame"
            )

    @property
    def frames(self) -> pd.DataFrame:
        return self._frames

    def __len__(self):
        return len(self._frames)

    @property
    def columns(self):
        return list(self._frames.columns)

    def column_as_numpy_array(self, column_name: str) -> np.ndarray:
        """Return a column as numpy array

        Args:
            column_name (str): the name of the required column

        Returns:
            numpy.ndarray: the column data as a numpy array
        """
        return self._frames[column_name].to_numpy()

    def serialize(self):
        obj = {"frames": self._frames, "batch_size": len(self)}
        return PickleSerializer.serialize(obj)

    @classmethod
    def deserialize(cls, data):
        obj = PickleSerializer.deserialize(data)
        return cls(frames=obj["frames"])

    @classmethod
    def from_eq(cls, batch1: Batch, batch2: Batch) -> Batch:
        return Batch(pd.DataFrame(batch1.to_numpy() == batch2.to_numpy()))

    @classmethod
    def from_greater(cls, batch1: Batch, batch2: Batch) -> Batch:
        return Batch(pd.DataFrame(batch1.to_numpy() > batch2.to_numpy()))

    @classmethod
    def from_lesser(cls, batch1: Batch, batch2: Batch) -> Batch:
        return Batch(pd.DataFrame(batch1.to_numpy() < batch2.to_numpy()))

    @classmethod
    def from_greater_eq(cls, batch1: Batch, batch2: Batch) -> Batch:
        return Batch(pd.DataFrame(batch1.to_numpy() >= batch2.to_numpy()))

    @classmethod
    def from_lesser_eq(cls, batch1: Batch, batch2: Batch) -> Batch:
        return Batch(pd.DataFrame(batch1.to_numpy() <= batch2.to_numpy()))

    @classmethod
    def from_not_eq(cls, batch1: Batch, batch2: Batch) -> Batch:
        return Batch(pd.DataFrame(batch1.to_numpy() != batch2.to_numpy()))

    @classmethod
    def compare_contains(cls, batch1: Batch, batch2: Batch) -> None:
        return cls(
            pd.DataFrame(
                [all(x in p for x in q) for p, q in zip(left, right)]
                for left, right in zip(batch1.to_numpy(), batch2.to_numpy())
            )
        )

    @classmethod
    def compare_is_contained(cls, batch1: Batch, batch2: Batch) -> None:
        return cls(
            pd.DataFrame(
                [all(x in q for x in p) for p, q in zip(left, right)]
                for left, right in zip(batch1.to_numpy(), batch2.to_numpy())
            )
        )

    @classmethod
    def compare_like(cls, batch1: Batch, batch2: Batch) -> None:
        col = batch1._frames.iloc[:, 0]
        regex = batch2._frames.iloc[:, 0][0]
        return cls(pd.DataFrame(col.astype("str").str.match(pat=regex)))

    def __str__(self) -> str:
        with pd.option_context(
            "display.pprint_nest_depth", 1, "display.max_colwidth", 100
        ):
            return f"{self._frames}"

    def __eq__(self, other: Batch):
        # this function does not work if a column is a nested numpy arrays
        # (eg, bboxes from yolo).
        return self._frames[sorted(self.columns)].equals(
            other.frames[sorted(other.columns)]
        )

    def __getitem__(self, indices) -> Batch:
        """
        Returns a batch with the desired frames

        Arguments:
            indices (list, slice or mask): list must be
            a list of indices; mask is boolean array-like
            (i.e. list, NumPy array, DataFrame, etc.)
            of appropriate size with True for desired frames.
        """
        if isinstance(indices, list):
            return self._get_frames_from_indices(indices)
        elif isinstance(indices, slice):
            start = indices.start if indices.start else 0
            end = indices.stop if indices.stop else len(self.frames)
            if end < 0:
                end = len(self._frames) + end
            step = indices.step if indices.step else 1
            return self._get_frames_from_indices(range(start, end, step))
        elif isinstance(indices, int):
            return self._get_frames_from_indices([indices])
        else:
            raise TypeError("Invalid argument type: {}".format(type(indices)))

    def _get_frames_from_indices(self, required_frame_ids):
        new_frames = self._frames.iloc[required_frame_ids, :]
        new_batch = Batch(new_frames)
        return new_batch

    def apply_function_expression(self, expr: Callable) -> Batch:
        """
        Execute function expression on frames.
        """
        return Batch(expr(self._frames))

    def iterrows(self):
        return self._frames.iterrows()

    def sort(self, by=None) -> None:
        """
        in_place sort
        """
        if self.empty():
            return
        if by is None:
            by = self.columns[0]
        self._frames.sort_values(by=by, ignore_index=True, inplace=True)

    def sort_orderby(self, by, sort_type=None) -> None:
        """
        in_place sort for orderby

        Args:
            by: list of column names
            sort_type: list of True/False if ASC for each column name in 'by'
                i.e [True, False] means [ASC, DESC]
        """

        if sort_type is None:
            sort_type = [True]

        assert by is not None
        for column in by:
            assert (
                column in self._frames.columns
            ), "Can not orderby non-projected column: {}".format(column)

        self._frames.sort_values(
            by, ascending=sort_type, ignore_index=True, inplace=True
        )

    def invert(self) -> None:
        self._frames = ~self._frames

    def all_true(self) -> bool:
        return self._frames.all().bool()

    def all_false(self) -> bool:
        inverted = ~self._frames
        return inverted.all().bool()

    def create_mask(self) -> List:
        """
        Return list of indices of first row.
        """
        return self._frames[self._frames[0]].index.tolist()

    def create_inverted_mask(self) -> List:
        return self._frames[~self._frames[0]].index.tolist()

    def update_indices(self, indices: List, other: Batch):
        self._frames.iloc[indices] = other._frames
        self._frames = pd.DataFrame(self._frames)

    def file_paths(self) -> Iterable:
        yield from self._frames["file_path"]

    def project(self, cols: None) -> Batch:
        """
        Takes as input the column list, returns the projection.
        We do a copy for now.
        """
        cols = cols or []
        verfied_cols = [c for c in cols if c in self._frames]
        unknown_cols = list(set(cols) - set(verfied_cols))
        assert len(unknown_cols) == 0, unknown_cols
        return Batch(self._frames[verfied_cols])

    @classmethod
    def merge_column_wise(cls, batches: List[Batch], auto_renaming=False) -> Batch:
        """
        Merge list of batch frames column_wise and return a new batch frame
        Arguments:
            batches: List[Batch]: lsit of batch objects to be merged
            auto_renaming: if true rename column names if required

        Returns:
            Batch: Merged batch object
        """
        if not len(batches):
            return Batch()

        frames = [batch.frames for batch in batches]
        new_frames = pd.concat(frames, axis=1, copy=False, ignore_index=False).fillna(
            method="ffill"
        )
        if new_frames.columns.duplicated().any():
            logger.warn("Duplicated column name detected {}".format(new_frames))
        return Batch(new_frames)

    def __add__(self, other: Batch) -> Batch:
        """
        Adds two batch frames and return a new batch frame
        Arguments:
            other (Batch): other framebatch to add

        Returns:
            Batch
        """
        if not isinstance(other, Batch):
            raise TypeError("Input should be of type Batch")

        # Appending a empty dataframe with column name leads to NaN row.
        if self.empty():
            return other
        if other.empty():
            return self

        return Batch.concat([self, other], copy=False)

    @classmethod
    def concat(cls, batch_list: Iterable[Batch], copy=True) -> Batch:
        """Concat a list of batches.
        Notice: only frames are considered.
        """

        # pd.concat will convert generator into list, so it does not hurt
        # if we convert ourselves.
        frame_list = list([batch.frames for batch in batch_list])
        if len(frame_list) == 0:
            return Batch()
        frame = pd.concat(frame_list, ignore_index=True, copy=copy)

        return Batch(frame)

    @classmethod
    def stack(cls, batch: Batch, copy=True) -> Batch:
        """Stack a given batch along the 0th dimension.
        Notice: input assumed to contain only one column with video frames

        Returns:
            Batch (always of length 1)
        """
        if len(batch.columns) > 1:
            raise ValueError("Stack can only be called on single-column batches")
        frame_data_col = batch.columns[0]

        stacked_array = np.array(batch.frames[frame_data_col].values.tolist())
        stacked_frame = pd.DataFrame([{frame_data_col: stacked_array}])

        return Batch(stacked_frame)

    @classmethod
    def join(cls, first: Batch, second: Batch, how="inner") -> Batch:
        return cls(
            first._frames.merge(
                second._frames, left_index=True, right_index=True, how=how
            )
        )

    @classmethod
    def combine_batches(
        cls, first: Batch, second: Batch, expression: ExpressionType
    ) -> Batch:
        """
        Creates Batch by combining two batches using some arithmetic expression.
        """
        if expression == ExpressionType.ARITHMETIC_ADD:
            return Batch(pd.DataFrame(first._frames + second._frames))
        elif expression == ExpressionType.ARITHMETIC_SUBTRACT:
            return Batch(pd.DataFrame(first._frames - second._frames))
        elif expression == ExpressionType.ARITHMETIC_MULTIPLY:
            return Batch(pd.DataFrame(first._frames * second._frames))
        elif expression == ExpressionType.ARITHMETIC_DIVIDE:
            return Batch(pd.DataFrame(first._frames / second._frames))

    def reassign_indices_to_hash(self, indices) -> None:
        """
        Hash indices and replace the indices with those hash values.
        """
        self._frames.index = self._frames[indices].apply(
            lambda x: hash(tuple(x)), axis=1
        )

    def aggregate(self, method: str) -> None:
        """
        Aggregate batch based on method.
        Methods can be sum, count, min, max, mean

        Arguments:
            method: string with one of the five above options
        """
        self._frames = self._frames.agg([method])

    def empty(self):
        """Checks if the batch is empty
        Returns:
            True if the batch_size == 0
        """
        return len(self) == 0

    def unnest(self, cols: List[str] = None) -> None:
        """
        Unnest columns and drop columns with no data
        """
        if cols is None:
            cols = list(self.columns)
        self._frames = self._frames.explode(cols)
        self._frames.dropna(inplace=True)

    def reverse(self) -> None:
        """Reverses dataframe"""
        self._frames = self._frames[::-1]
        self._frames.reset_index(drop=True, inplace=True)

    def drop_zero(self, outcomes: Batch) -> None:
        """Drop all columns with corresponding outcomes containing zero."""
        self._frames = self._frames[(outcomes._frames > 0).to_numpy()]

    def reset_index(self):
        """Resets the index of the data frame in the batch"""
        self._frames.reset_index(drop=True, inplace=True)

    def modify_column_alias(self, alias: Union[Alias, str]) -> None:
        # a, b, c -> table1.a, table1.b, table1.c
        # t1.a -> t2.a
        if isinstance(alias, str):
            alias = Alias(alias)
        new_col_names = []
        if len(alias.col_names):
            if len(self.columns) != len(alias.col_names):
                err_msg = (
                    f"Expected {len(alias.col_names)} columns {alias.col_names},"
                    f"got {len(self.columns)} columns {self.columns}."
                )
                raise RuntimeError(err_msg)
            new_col_names = [
                "{}.{}".format(alias.alias_name, col_name)
                for col_name in alias.col_names
            ]
        else:
            for col_name in self.columns:
                if "." in str(col_name):
                    new_col_names.append(
                        "{}.{}".format(alias.alias_name, str(col_name).split(".")[1])
                    )
                else:
                    new_col_names.append("{}.{}".format(alias.alias_name, col_name))

        self._frames.columns = new_col_names

    def drop_column_alias(self) -> None:
        # table1.a, table1.b, table1.c -> a, b, c
        new_col_names = []
        for col_name in self.columns:
            if "." in col_name:
                new_col_names.append(col_name.split(".")[1])
            else:
                new_col_names.append(col_name)

        self._frames.columns = new_col_names

    def to_numpy(self):
        return self._frames.to_numpy()

    def rename(self, columns) -> None:
        "Rename column names"
        self._frames.rename(columns=columns, inplace=True)

1	# coding=utf-8
2	# Copyright 2018-2022 EVA
3	#
4	# Licensed under the Apache License, Version 2.0 (the "License");
5	# you may not use this file except in compliance with the License.
6	# You may obtain a copy of the License at
7	#
8	# http://www.apache.org/licenses/LICENSE-2.0
9	#
10	# Unless required by applicable law or agreed to in writing, software
11	# distributed under the License is distributed on an "AS IS" BASIS,
12	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	# See the License for the specific language governing permissions and
14	# limitations under the License.
15	from typing import Callable, Iterable, List, TypeVar, Union	1✔
16
17	import numpy as np	1✔
18	import pandas as pd	1✔
19
20	from eva.expression.abstract_expression import ExpressionType	1✔
21	from eva.parser.alias import Alias	1✔
22	from eva.utils.generic_utils import PickleSerializer	1✔
23	from eva.utils.logging_manager import logger	1✔
24
25	Batch = TypeVar("Batch")	1✔
26
27
28	class Batch:	1✔
29	"""
30	Data model used for storing a batch of frames.
31	Internally stored as a pandas DataFrame with columns
32	"id" and "data".
33	id: integer index of frame
34	data: frame as np.array
35
36	Arguments:
37	frames (DataFrame): pandas Dataframe holding frames data
38	"""
39
40	def __init__(self, frames=None):	1✔
41	self._frames = pd.DataFrame() if frames is None else frames	1✔
42	if not isinstance(self._frames, pd.DataFrame):	1✔
43	raise ValueError(
44	"Batch constructor not properly called.\n" "Expected pandas.DataFrame"
45	)
46
47	@property	1✔
48	def frames(self) -> pd.DataFrame:	1✔
49	return self._frames	1✔
50
51	def __len__(self):	1✔
52	return len(self._frames)	1✔
53
54	@property	1✔
55	def columns(self):	1✔
56	return list(self._frames.columns)	1✔
57
58	def column_as_numpy_array(self, column_name: str) -> np.ndarray:	1✔
59	"""Return a column as numpy array
60
61	Args:
62	column_name (str): the name of the required column
63
64	Returns:
65	numpy.ndarray: the column data as a numpy array
66	"""
67	return self._frames[column_name].to_numpy()	1✔
68
69	def serialize(self):	1✔
70	obj = {"frames": self._frames, "batch_size": len(self)}	1✔
71	return PickleSerializer.serialize(obj)	1✔
72
73	@classmethod	1✔
74	def deserialize(cls, data):	1✔
75	obj = PickleSerializer.deserialize(data)	1✔
76	return cls(frames=obj["frames"])	1✔
77
78	@classmethod	1✔
79	def from_eq(cls, batch1: Batch, batch2: Batch) -> Batch:	1✔
80	return Batch(pd.DataFrame(batch1.to_numpy() == batch2.to_numpy()))	1✔
81
82	@classmethod	1✔
83	def from_greater(cls, batch1: Batch, batch2: Batch) -> Batch:	1✔
84	return Batch(pd.DataFrame(batch1.to_numpy() > batch2.to_numpy()))	1✔
85
86	@classmethod	1✔
87	def from_lesser(cls, batch1: Batch, batch2: Batch) -> Batch:	1✔
88	return Batch(pd.DataFrame(batch1.to_numpy() < batch2.to_numpy()))	1✔
89
90	@classmethod	1✔
91	def from_greater_eq(cls, batch1: Batch, batch2: Batch) -> Batch:	1✔
92	return Batch(pd.DataFrame(batch1.to_numpy() >= batch2.to_numpy()))	1✔
93
94	@classmethod	1✔
95	def from_lesser_eq(cls, batch1: Batch, batch2: Batch) -> Batch:	1✔
96	return Batch(pd.DataFrame(batch1.to_numpy() <= batch2.to_numpy()))	1✔
97
98	@classmethod	1✔
99	def from_not_eq(cls, batch1: Batch, batch2: Batch) -> Batch:	1✔
100	return Batch(pd.DataFrame(batch1.to_numpy() != batch2.to_numpy()))	1✔
101
102	@classmethod	1✔
103	def compare_contains(cls, batch1: Batch, batch2: Batch) -> None:	1✔
104	return cls(	1✔
105	pd.DataFrame(
106	[all(x in p for x in q) for p, q in zip(left, right)]
107	for left, right in zip(batch1.to_numpy(), batch2.to_numpy())
108	)
109	)
110
111	@classmethod	1✔
112	def compare_is_contained(cls, batch1: Batch, batch2: Batch) -> None:	1✔
113	return cls(	1✔
114	pd.DataFrame(
115	[all(x in q for x in p) for p, q in zip(left, right)]
116	for left, right in zip(batch1.to_numpy(), batch2.to_numpy())
117	)
118	)
119
120	@classmethod	1✔
121	def compare_like(cls, batch1: Batch, batch2: Batch) -> None:	1✔
122	col = batch1._frames.iloc[:, 0]	1✔
123	regex = batch2._frames.iloc[:, 0][0]	1✔
124	return cls(pd.DataFrame(col.astype("str").str.match(pat=regex)))	1✔
125
126	def __str__(self) -> str:	1✔
127	with pd.option_context(	1✔
128	"display.pprint_nest_depth", 1, "display.max_colwidth", 100
129	):
130	return f"{self._frames}"	1✔
131
132	def __eq__(self, other: Batch):	1✔
133	# this function does not work if a column is a nested numpy arrays
134	# (eg, bboxes from yolo).
135	return self._frames[sorted(self.columns)].equals(	1✔
136	other.frames[sorted(other.columns)]
137	)
138
139	def __getitem__(self, indices) -> Batch:	1✔
140	"""
141	Returns a batch with the desired frames
142
143	Arguments:
144	indices (list, slice or mask): list must be
145	a list of indices; mask is boolean array-like
146	(i.e. list, NumPy array, DataFrame, etc.)
147	of appropriate size with True for desired frames.
148	"""
149	if isinstance(indices, list):	1✔
150	return self._get_frames_from_indices(indices)	1✔
151	elif isinstance(indices, slice):	1✔
152	start = indices.start if indices.start else 0	1✔
153	end = indices.stop if indices.stop else len(self.frames)	1✔
154	if end < 0:	1✔
155	end = len(self._frames) + end	1✔
156	step = indices.step if indices.step else 1	1✔
157	return self._get_frames_from_indices(range(start, end, step))	1✔
158	elif isinstance(indices, int):	1✔
159	return self._get_frames_from_indices([indices])	1✔
160	else:
161	raise TypeError("Invalid argument type: {}".format(type(indices)))
162
163	def _get_frames_from_indices(self, required_frame_ids):	1✔
164	new_frames = self._frames.iloc[required_frame_ids, :]	1✔
165	new_batch = Batch(new_frames)	1✔
166	return new_batch	1✔
167
168	def apply_function_expression(self, expr: Callable) -> Batch:	1✔
169	"""
170	Execute function expression on frames.
171	"""
172	return Batch(expr(self._frames))	1✔
173
174	def iterrows(self):	1✔
175	return self._frames.iterrows()	1✔
176
177	def sort(self, by=None) -> None:	1✔
178	"""
179	in_place sort
180	"""
181	if self.empty():	1✔
182	return	×
183	if by is None:	1✔
184	by = self.columns[0]	1✔
185	self._frames.sort_values(by=by, ignore_index=True, inplace=True)	1✔
186
187	def sort_orderby(self, by, sort_type=None) -> None:	1✔
188	"""
189	in_place sort for orderby
190
191	Args:
192	by: list of column names
193	sort_type: list of True/False if ASC for each column name in 'by'
194	i.e [True, False] means [ASC, DESC]
195	"""
196
197	if sort_type is None:	1✔
198	sort_type = [True]	1✔
199
200	assert by is not None	1✔
201	for column in by:	1✔
202	assert (	1✔
203	column in self._frames.columns
204	), "Can not orderby non-projected column: {}".format(column)
205
206	self._frames.sort_values(	1✔
207	by, ascending=sort_type, ignore_index=True, inplace=True
208	)
209
210	def invert(self) -> None:	1✔
211	self._frames = ~self._frames	1✔
212
213	def all_true(self) -> bool:	1✔
214	return self._frames.all().bool()	1✔
215
216	def all_false(self) -> bool:	1✔
217	inverted = ~self._frames	1✔
218	return inverted.all().bool()	1✔
219
220	def create_mask(self) -> List:	1✔
221	"""
222	Return list of indices of first row.
223	"""
224	return self._frames[self._frames[0]].index.tolist()	1✔
225
226	def create_inverted_mask(self) -> List:	1✔
227	return self._frames[~self._frames[0]].index.tolist()	1✔
228
229	def update_indices(self, indices: List, other: Batch):	1✔
230	self._frames.iloc[indices] = other._frames	1✔
231	self._frames = pd.DataFrame(self._frames)	1✔
232
233	def file_paths(self) -> Iterable:	1✔
234	yield from self._frames["file_path"]	1✔
235
236	def project(self, cols: None) -> Batch:	1✔
237	"""
238	Takes as input the column list, returns the projection.
239	We do a copy for now.
240	"""
241	cols = cols or []	1✔
242	verfied_cols = [c for c in cols if c in self._frames]	1✔
243	unknown_cols = list(set(cols) - set(verfied_cols))	1✔
244	assert len(unknown_cols) == 0, unknown_cols	1✔
245	return Batch(self._frames[verfied_cols])	1✔
246
247	@classmethod	1✔
248	def merge_column_wise(cls, batches: List[Batch], auto_renaming=False) -> Batch:	1✔
249	"""
250	Merge list of batch frames column_wise and return a new batch frame
251	Arguments:
252	batches: List[Batch]: lsit of batch objects to be merged
253	auto_renaming: if true rename column names if required
254
255	Returns:
256	Batch: Merged batch object
257	"""
258	if not len(batches):	1✔
259	return Batch()	1✔
260
261	frames = [batch.frames for batch in batches]	1✔
262	new_frames = pd.concat(frames, axis=1, copy=False, ignore_index=False).fillna(	1✔
263	method="ffill"
264	)
265	if new_frames.columns.duplicated().any():	1✔
266	logger.warn("Duplicated column name detected {}".format(new_frames))	1✔
267	return Batch(new_frames)	1✔
268
269	def __add__(self, other: Batch) -> Batch:	1✔
270	"""
271	Adds two batch frames and return a new batch frame
272	Arguments:
273	other (Batch): other framebatch to add
274
275	Returns:
276	Batch
277	"""
278	if not isinstance(other, Batch):	1✔
279	raise TypeError("Input should be of type Batch")
280
281	# Appending a empty dataframe with column name leads to NaN row.
282	if self.empty():	1✔
283	return other	1✔
284	if other.empty():	1✔
285	return self	1✔
286
287	return Batch.concat([self, other], copy=False)	1✔
288
289	@classmethod	1✔
290	def concat(cls, batch_list: Iterable[Batch], copy=True) -> Batch:	1✔
291	"""Concat a list of batches.
292	Notice: only frames are considered.
293	"""
294
295	# pd.concat will convert generator into list, so it does not hurt
296	# if we convert ourselves.
297	frame_list = list([batch.frames for batch in batch_list])	1✔
298	if len(frame_list) == 0:	1✔
299	return Batch()	1✔
300	frame = pd.concat(frame_list, ignore_index=True, copy=copy)	1✔
301
302	return Batch(frame)	1✔
303
304	@classmethod	1✔
305	def stack(cls, batch: Batch, copy=True) -> Batch:	1✔
306	"""Stack a given batch along the 0th dimension.
307	Notice: input assumed to contain only one column with video frames
308
309	Returns:
310	Batch (always of length 1)
311	"""
312	if len(batch.columns) > 1:	1✔
313	raise ValueError("Stack can only be called on single-column batches")
314	frame_data_col = batch.columns[0]	1✔
315
316	stacked_array = np.array(batch.frames[frame_data_col].values.tolist())	1✔
317	stacked_frame = pd.DataFrame([{frame_data_col: stacked_array}])	1✔
318
319	return Batch(stacked_frame)	1✔
320
321	@classmethod	1✔
322	def join(cls, first: Batch, second: Batch, how="inner") -> Batch:	1✔
323	return cls(	1✔
324	first._frames.merge(
325	second._frames, left_index=True, right_index=True, how=how
326	)
327	)
328
329	@classmethod	1✔
330	def combine_batches(	1✔
331	cls, first: Batch, second: Batch, expression: ExpressionType
332	) -> Batch:
333	"""
334	Creates Batch by combining two batches using some arithmetic expression.
335	"""
336	if expression == ExpressionType.ARITHMETIC_ADD:	1✔
337	return Batch(pd.DataFrame(first._frames + second._frames))	1✔
338	elif expression == ExpressionType.ARITHMETIC_SUBTRACT:	1✔
339	return Batch(pd.DataFrame(first._frames - second._frames))	1✔
340	elif expression == ExpressionType.ARITHMETIC_MULTIPLY:	1✔
341	return Batch(pd.DataFrame(first._frames * second._frames))	1✔
342	elif expression == ExpressionType.ARITHMETIC_DIVIDE:	1✔
343	return Batch(pd.DataFrame(first._frames / second._frames))	1✔
344
345	def reassign_indices_to_hash(self, indices) -> None:	1✔
346	"""
347	Hash indices and replace the indices with those hash values.
348	"""
349	self._frames.index = self._frames[indices].apply(	1✔
350	lambda x: hash(tuple(x)), axis=1
351	)
352
353	def aggregate(self, method: str) -> None:	1✔
354	"""
355	Aggregate batch based on method.
356	Methods can be sum, count, min, max, mean
357
358	Arguments:
359	method: string with one of the five above options
360	"""
361	self._frames = self._frames.agg([method])	1✔
362
363	def empty(self):	1✔
364	"""Checks if the batch is empty
365	Returns:
366	True if the batch_size == 0
367	"""
368	return len(self) == 0	1✔
369
370	def unnest(self, cols: List[str] = None) -> None:	1✔
371	"""
372	Unnest columns and drop columns with no data
373	"""
374	if cols is None:	1✔
375	cols = list(self.columns)	×
376	self._frames = self._frames.explode(cols)	1✔
377	self._frames.dropna(inplace=True)	1✔
378
379	def reverse(self) -> None:	1✔
380	"""Reverses dataframe"""
381	self._frames = self._frames[::-1]	1✔
382	self._frames.reset_index(drop=True, inplace=True)	1✔
383
384	def drop_zero(self, outcomes: Batch) -> None:	1✔
385	"""Drop all columns with corresponding outcomes containing zero."""
386	self._frames = self._frames[(outcomes._frames > 0).to_numpy()]	1✔
387
388	def reset_index(self):	1✔
389	"""Resets the index of the data frame in the batch"""
390	self._frames.reset_index(drop=True, inplace=True)	1✔
391
392	def modify_column_alias(self, alias: Union[Alias, str]) -> None:	1✔
393	# a, b, c -> table1.a, table1.b, table1.c
394	# t1.a -> t2.a
395	if isinstance(alias, str):	1✔
396	alias = Alias(alias)	1✔
397	new_col_names = []	1✔
398	if len(alias.col_names):	1✔
399	if len(self.columns) != len(alias.col_names):	1✔
400	err_msg = (	1✔
401	f"Expected {len(alias.col_names)} columns {alias.col_names},"
402	f"got {len(self.columns)} columns {self.columns}."
403	)
404	raise RuntimeError(err_msg)
405	new_col_names = [	1✔
406	"{}.{}".format(alias.alias_name, col_name)
407	for col_name in alias.col_names
408	]
409	else:
410	for col_name in self.columns:	1✔
411	if "." in str(col_name):	1✔
412	new_col_names.append(	1✔
413	"{}.{}".format(alias.alias_name, str(col_name).split(".")[1])
414	)
415	else:
416	new_col_names.append("{}.{}".format(alias.alias_name, col_name))	1✔
417
418	self._frames.columns = new_col_names	1✔
419
420	def drop_column_alias(self) -> None:	1✔
421	# table1.a, table1.b, table1.c -> a, b, c
422	new_col_names = []	1✔
423	for col_name in self.columns:	1✔
424	if "." in col_name:	1✔
425	new_col_names.append(col_name.split(".")[1])	1✔
426	else:
427	new_col_names.append(col_name)	1✔
428
429	self._frames.columns = new_col_names	1✔
430
431	def to_numpy(self):	1✔
432	return self._frames.to_numpy()	1✔
433
434	def rename(self, columns) -> None:	1✔
435	"Rename column names"
436	self._frames.rename(columns=columns, inplace=True)	1✔

georgia-tech-db / eva / 8ac704ce-924d-4415-96d0-a7a53cd460d1

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous