• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

georgia-tech-db / eva / 8ac704ce-924d-4415-96d0-a7a53cd460d1

pending completion
8ac704ce-924d-4415-96d0-a7a53cd460d1

Pull #566

circle-ci

xzdandy
Merge branch 'obj-tracking' of github.com:georgia-tech-db/eva into obj-tracking
Pull Request #566: feat: object tracking

155 of 155 new or added lines in 16 files covered. (100.0%)

9371 of 9588 relevant lines covered (97.74%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.01
/eva/models/storage/batch.py
1
# coding=utf-8
2
# Copyright 2018-2022 EVA
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
from typing import Callable, Iterable, List, TypeVar, Union
1✔
16

17
import numpy as np
1✔
18
import pandas as pd
1✔
19

20
from eva.expression.abstract_expression import ExpressionType
1✔
21
from eva.parser.alias import Alias
1✔
22
from eva.utils.generic_utils import PickleSerializer
1✔
23
from eva.utils.logging_manager import logger
1✔
24

25
Batch = TypeVar("Batch")
1✔
26

27

28
class Batch:
1✔
29
    """
30
    Data model used for storing a batch of frames.
31
    Internally stored as a pandas DataFrame with columns
32
    "id" and "data".
33
    id: integer index of frame
34
    data: frame as np.array
35

36
    Arguments:
37
        frames (DataFrame): pandas Dataframe holding frames data
38
    """
39

40
    def __init__(self, frames=None):
1✔
41
        self._frames = pd.DataFrame() if frames is None else frames
1✔
42
        if not isinstance(self._frames, pd.DataFrame):
1✔
43
            raise ValueError(
44
                "Batch constructor not properly called.\n" "Expected pandas.DataFrame"
45
            )
46

47
    @property
1✔
48
    def frames(self) -> pd.DataFrame:
1✔
49
        return self._frames
1✔
50

51
    def __len__(self):
1✔
52
        return len(self._frames)
1✔
53

54
    @property
1✔
55
    def columns(self):
1✔
56
        return list(self._frames.columns)
1✔
57

58
    def column_as_numpy_array(self, column_name: str) -> np.ndarray:
1✔
59
        """Return a column as numpy array
60

61
        Args:
62
            column_name (str): the name of the required column
63

64
        Returns:
65
            numpy.ndarray: the column data as a numpy array
66
        """
67
        return self._frames[column_name].to_numpy()
1✔
68

69
    def serialize(self):
1✔
70
        obj = {"frames": self._frames, "batch_size": len(self)}
1✔
71
        return PickleSerializer.serialize(obj)
1✔
72

73
    @classmethod
1✔
74
    def deserialize(cls, data):
1✔
75
        obj = PickleSerializer.deserialize(data)
1✔
76
        return cls(frames=obj["frames"])
1✔
77

78
    @classmethod
1✔
79
    def from_eq(cls, batch1: Batch, batch2: Batch) -> Batch:
1✔
80
        return Batch(pd.DataFrame(batch1.to_numpy() == batch2.to_numpy()))
1✔
81

82
    @classmethod
1✔
83
    def from_greater(cls, batch1: Batch, batch2: Batch) -> Batch:
1✔
84
        return Batch(pd.DataFrame(batch1.to_numpy() > batch2.to_numpy()))
1✔
85

86
    @classmethod
1✔
87
    def from_lesser(cls, batch1: Batch, batch2: Batch) -> Batch:
1✔
88
        return Batch(pd.DataFrame(batch1.to_numpy() < batch2.to_numpy()))
1✔
89

90
    @classmethod
1✔
91
    def from_greater_eq(cls, batch1: Batch, batch2: Batch) -> Batch:
1✔
92
        return Batch(pd.DataFrame(batch1.to_numpy() >= batch2.to_numpy()))
1✔
93

94
    @classmethod
1✔
95
    def from_lesser_eq(cls, batch1: Batch, batch2: Batch) -> Batch:
1✔
96
        return Batch(pd.DataFrame(batch1.to_numpy() <= batch2.to_numpy()))
1✔
97

98
    @classmethod
1✔
99
    def from_not_eq(cls, batch1: Batch, batch2: Batch) -> Batch:
1✔
100
        return Batch(pd.DataFrame(batch1.to_numpy() != batch2.to_numpy()))
1✔
101

102
    @classmethod
1✔
103
    def compare_contains(cls, batch1: Batch, batch2: Batch) -> None:
1✔
104
        return cls(
1✔
105
            pd.DataFrame(
106
                [all(x in p for x in q) for p, q in zip(left, right)]
107
                for left, right in zip(batch1.to_numpy(), batch2.to_numpy())
108
            )
109
        )
110

111
    @classmethod
1✔
112
    def compare_is_contained(cls, batch1: Batch, batch2: Batch) -> None:
1✔
113
        return cls(
1✔
114
            pd.DataFrame(
115
                [all(x in q for x in p) for p, q in zip(left, right)]
116
                for left, right in zip(batch1.to_numpy(), batch2.to_numpy())
117
            )
118
        )
119

120
    @classmethod
1✔
121
    def compare_like(cls, batch1: Batch, batch2: Batch) -> None:
1✔
122
        col = batch1._frames.iloc[:, 0]
1✔
123
        regex = batch2._frames.iloc[:, 0][0]
1✔
124
        return cls(pd.DataFrame(col.astype("str").str.match(pat=regex)))
1✔
125

126
    def __str__(self) -> str:
1✔
127
        with pd.option_context(
1✔
128
            "display.pprint_nest_depth", 1, "display.max_colwidth", 100
129
        ):
130
            return f"{self._frames}"
1✔
131

132
    def __eq__(self, other: Batch):
1✔
133
        # this function does not work if a column is a nested numpy arrays
134
        # (eg, bboxes from yolo).
135
        return self._frames[sorted(self.columns)].equals(
1✔
136
            other.frames[sorted(other.columns)]
137
        )
138

139
    def __getitem__(self, indices) -> Batch:
1✔
140
        """
141
        Returns a batch with the desired frames
142

143
        Arguments:
144
            indices (list, slice or mask): list must be
145
            a list of indices; mask is boolean array-like
146
            (i.e. list, NumPy array, DataFrame, etc.)
147
            of appropriate size with True for desired frames.
148
        """
149
        if isinstance(indices, list):
1✔
150
            return self._get_frames_from_indices(indices)
1✔
151
        elif isinstance(indices, slice):
1✔
152
            start = indices.start if indices.start else 0
1✔
153
            end = indices.stop if indices.stop else len(self.frames)
1✔
154
            if end < 0:
1✔
155
                end = len(self._frames) + end
1✔
156
            step = indices.step if indices.step else 1
1✔
157
            return self._get_frames_from_indices(range(start, end, step))
1✔
158
        elif isinstance(indices, int):
1✔
159
            return self._get_frames_from_indices([indices])
1✔
160
        else:
161
            raise TypeError("Invalid argument type: {}".format(type(indices)))
162

163
    def _get_frames_from_indices(self, required_frame_ids):
1✔
164
        new_frames = self._frames.iloc[required_frame_ids, :]
1✔
165
        new_batch = Batch(new_frames)
1✔
166
        return new_batch
1✔
167

168
    def apply_function_expression(self, expr: Callable) -> Batch:
1✔
169
        """
170
        Execute function expression on frames.
171
        """
172
        return Batch(expr(self._frames))
1✔
173

174
    def iterrows(self):
1✔
175
        return self._frames.iterrows()
1✔
176

177
    def sort(self, by=None) -> None:
1✔
178
        """
179
        in_place sort
180
        """
181
        if self.empty():
1✔
182
            return
×
183
        if by is None:
1✔
184
            by = self.columns[0]
1✔
185
        self._frames.sort_values(by=by, ignore_index=True, inplace=True)
1✔
186

187
    def sort_orderby(self, by, sort_type=None) -> None:
1✔
188
        """
189
        in_place sort for orderby
190

191
        Args:
192
            by: list of column names
193
            sort_type: list of True/False if ASC for each column name in 'by'
194
                i.e [True, False] means [ASC, DESC]
195
        """
196

197
        if sort_type is None:
1✔
198
            sort_type = [True]
1✔
199

200
        assert by is not None
1✔
201
        for column in by:
1✔
202
            assert (
1✔
203
                column in self._frames.columns
204
            ), "Can not orderby non-projected column: {}".format(column)
205

206
        self._frames.sort_values(
1✔
207
            by, ascending=sort_type, ignore_index=True, inplace=True
208
        )
209

210
    def invert(self) -> None:
1✔
211
        self._frames = ~self._frames
1✔
212

213
    def all_true(self) -> bool:
1✔
214
        return self._frames.all().bool()
1✔
215

216
    def all_false(self) -> bool:
1✔
217
        inverted = ~self._frames
1✔
218
        return inverted.all().bool()
1✔
219

220
    def create_mask(self) -> List:
1✔
221
        """
222
        Return list of indices of first row.
223
        """
224
        return self._frames[self._frames[0]].index.tolist()
1✔
225

226
    def create_inverted_mask(self) -> List:
1✔
227
        return self._frames[~self._frames[0]].index.tolist()
1✔
228

229
    def update_indices(self, indices: List, other: Batch):
1✔
230
        self._frames.iloc[indices] = other._frames
1✔
231
        self._frames = pd.DataFrame(self._frames)
1✔
232

233
    def file_paths(self) -> Iterable:
1✔
234
        yield from self._frames["file_path"]
1✔
235

236
    def project(self, cols: None) -> Batch:
1✔
237
        """
238
        Takes as input the column list, returns the projection.
239
        We do a copy for now.
240
        """
241
        cols = cols or []
1✔
242
        verfied_cols = [c for c in cols if c in self._frames]
1✔
243
        unknown_cols = list(set(cols) - set(verfied_cols))
1✔
244
        assert len(unknown_cols) == 0, unknown_cols
1✔
245
        return Batch(self._frames[verfied_cols])
1✔
246

247
    @classmethod
1✔
248
    def merge_column_wise(cls, batches: List[Batch], auto_renaming=False) -> Batch:
1✔
249
        """
250
        Merge list of batch frames column_wise and return a new batch frame
251
        Arguments:
252
            batches: List[Batch]: lsit of batch objects to be merged
253
            auto_renaming: if true rename column names if required
254

255
        Returns:
256
            Batch: Merged batch object
257
        """
258
        if not len(batches):
1✔
259
            return Batch()
1✔
260

261
        frames = [batch.frames for batch in batches]
1✔
262
        new_frames = pd.concat(frames, axis=1, copy=False, ignore_index=False).fillna(
1✔
263
            method="ffill"
264
        )
265
        if new_frames.columns.duplicated().any():
1✔
266
            logger.warn("Duplicated column name detected {}".format(new_frames))
1✔
267
        return Batch(new_frames)
1✔
268

269
    def __add__(self, other: Batch) -> Batch:
1✔
270
        """
271
        Adds two batch frames and return a new batch frame
272
        Arguments:
273
            other (Batch): other framebatch to add
274

275
        Returns:
276
            Batch
277
        """
278
        if not isinstance(other, Batch):
1✔
279
            raise TypeError("Input should be of type Batch")
280

281
        # Appending a empty dataframe with column name leads to NaN row.
282
        if self.empty():
1✔
283
            return other
1✔
284
        if other.empty():
1✔
285
            return self
1✔
286

287
        return Batch.concat([self, other], copy=False)
1✔
288

289
    @classmethod
1✔
290
    def concat(cls, batch_list: Iterable[Batch], copy=True) -> Batch:
1✔
291
        """Concat a list of batches.
292
        Notice: only frames are considered.
293
        """
294

295
        # pd.concat will convert generator into list, so it does not hurt
296
        # if we convert ourselves.
297
        frame_list = list([batch.frames for batch in batch_list])
1✔
298
        if len(frame_list) == 0:
1✔
299
            return Batch()
1✔
300
        frame = pd.concat(frame_list, ignore_index=True, copy=copy)
1✔
301

302
        return Batch(frame)
1✔
303

304
    @classmethod
1✔
305
    def stack(cls, batch: Batch, copy=True) -> Batch:
1✔
306
        """Stack a given batch along the 0th dimension.
307
        Notice: input assumed to contain only one column with video frames
308

309
        Returns:
310
            Batch (always of length 1)
311
        """
312
        if len(batch.columns) > 1:
1✔
313
            raise ValueError("Stack can only be called on single-column batches")
314
        frame_data_col = batch.columns[0]
1✔
315

316
        stacked_array = np.array(batch.frames[frame_data_col].values.tolist())
1✔
317
        stacked_frame = pd.DataFrame([{frame_data_col: stacked_array}])
1✔
318

319
        return Batch(stacked_frame)
1✔
320

321
    @classmethod
1✔
322
    def join(cls, first: Batch, second: Batch, how="inner") -> Batch:
1✔
323
        return cls(
1✔
324
            first._frames.merge(
325
                second._frames, left_index=True, right_index=True, how=how
326
            )
327
        )
328

329
    @classmethod
1✔
330
    def combine_batches(
1✔
331
        cls, first: Batch, second: Batch, expression: ExpressionType
332
    ) -> Batch:
333
        """
334
        Creates Batch by combining two batches using some arithmetic expression.
335
        """
336
        if expression == ExpressionType.ARITHMETIC_ADD:
1✔
337
            return Batch(pd.DataFrame(first._frames + second._frames))
1✔
338
        elif expression == ExpressionType.ARITHMETIC_SUBTRACT:
1✔
339
            return Batch(pd.DataFrame(first._frames - second._frames))
1✔
340
        elif expression == ExpressionType.ARITHMETIC_MULTIPLY:
1✔
341
            return Batch(pd.DataFrame(first._frames * second._frames))
1✔
342
        elif expression == ExpressionType.ARITHMETIC_DIVIDE:
1✔
343
            return Batch(pd.DataFrame(first._frames / second._frames))
1✔
344

345
    def reassign_indices_to_hash(self, indices) -> None:
1✔
346
        """
347
        Hash indices and replace the indices with those hash values.
348
        """
349
        self._frames.index = self._frames[indices].apply(
1✔
350
            lambda x: hash(tuple(x)), axis=1
351
        )
352

353
    def aggregate(self, method: str) -> None:
1✔
354
        """
355
        Aggregate batch based on method.
356
        Methods can be sum, count, min, max, mean
357

358
        Arguments:
359
            method: string with one of the five above options
360
        """
361
        self._frames = self._frames.agg([method])
1✔
362

363
    def empty(self):
1✔
364
        """Checks if the batch is empty
365
        Returns:
366
            True if the batch_size == 0
367
        """
368
        return len(self) == 0
1✔
369

370
    def unnest(self, cols: List[str] = None) -> None:
1✔
371
        """
372
        Unnest columns and drop columns with no data
373
        """
374
        if cols is None:
1✔
375
            cols = list(self.columns)
×
376
        self._frames = self._frames.explode(cols)
1✔
377
        self._frames.dropna(inplace=True)
1✔
378

379
    def reverse(self) -> None:
1✔
380
        """Reverses dataframe"""
381
        self._frames = self._frames[::-1]
1✔
382
        self._frames.reset_index(drop=True, inplace=True)
1✔
383

384
    def drop_zero(self, outcomes: Batch) -> None:
1✔
385
        """Drop all columns with corresponding outcomes containing zero."""
386
        self._frames = self._frames[(outcomes._frames > 0).to_numpy()]
1✔
387

388
    def reset_index(self):
1✔
389
        """Resets the index of the data frame in the batch"""
390
        self._frames.reset_index(drop=True, inplace=True)
1✔
391

392
    def modify_column_alias(self, alias: Union[Alias, str]) -> None:
1✔
393
        # a, b, c -> table1.a, table1.b, table1.c
394
        # t1.a -> t2.a
395
        if isinstance(alias, str):
1✔
396
            alias = Alias(alias)
1✔
397
        new_col_names = []
1✔
398
        if len(alias.col_names):
1✔
399
            if len(self.columns) != len(alias.col_names):
1✔
400
                err_msg = (
1✔
401
                    f"Expected {len(alias.col_names)} columns {alias.col_names},"
402
                    f"got {len(self.columns)} columns {self.columns}."
403
                )
404
                raise RuntimeError(err_msg)
405
            new_col_names = [
1✔
406
                "{}.{}".format(alias.alias_name, col_name)
407
                for col_name in alias.col_names
408
            ]
409
        else:
410
            for col_name in self.columns:
1✔
411
                if "." in str(col_name):
1✔
412
                    new_col_names.append(
1✔
413
                        "{}.{}".format(alias.alias_name, str(col_name).split(".")[1])
414
                    )
415
                else:
416
                    new_col_names.append("{}.{}".format(alias.alias_name, col_name))
1✔
417

418
        self._frames.columns = new_col_names
1✔
419

420
    def drop_column_alias(self) -> None:
1✔
421
        # table1.a, table1.b, table1.c -> a, b, c
422
        new_col_names = []
1✔
423
        for col_name in self.columns:
1✔
424
            if "." in col_name:
1✔
425
                new_col_names.append(col_name.split(".")[1])
1✔
426
            else:
427
                new_col_names.append(col_name)
1✔
428

429
        self._frames.columns = new_col_names
1✔
430

431
    def to_numpy(self):
1✔
432
        return self._frames.to_numpy()
1✔
433

434
    def rename(self, columns) -> None:
1✔
435
        "Rename column names"
436
        self._frames.rename(columns=columns, inplace=True)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc