#837

Committed 19 Oct 2023 09:02AM UTC coverage: 0.0% (-78.6%) from 78.632%

Build # #837

Build Type

push

circle-ci

Committed by Andy Xu

Commit Message

Add CostEntry

Run Details

0 of 12416 relevant lines covered (0.0%)

0.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/evadb/functions/ndarray/array_count.py

# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pandas as pd

from evadb.functions.abstract.abstract_function import AbstractFunction


class ArrayCount(AbstractFunction):
    @property
    def name(self) -> str:
        return "ArrayCount"

    def setup(self):
        pass

    def forward(self, frames: pd.DataFrame) -> pd.DataFrame:
        """
        It will return a count of search element for each tuple.
        The idea is to flatten the input array along the first dimension and
        count the search element in this flattened array.
        For example,
        a tuple of shape (3,4,5) will be flattened into three (4,5) elements.
        And the search key is expected to be of shape (4,5),
        else we throw an error.

        frames: DataFrame
            col1        col2
        0   ndarray1    search_key
        1   ndarray2    search_key

        out: DataFrame
            count
        0   int
        1   int

        """
        # sanity check
        if len(frames.columns) != 2:
            raise ValueError("input contains more than one column")

        search_element = frames[frames.columns[-1]][0]
        values = pd.DataFrame(frames[frames.columns[0]])

        count_result = values.apply(
            lambda x: self.count_in_row(x[0], search_element), axis=1
        )

        return pd.DataFrame({"key_count": count_result.values})

    def count_in_row(self, row_val, search_element):
        # change the row and search element to numpy array
        row_val = np.array(row_val)
        search_element = np.array(search_element)

        # checks if dimension diff is one between
        # row_val and search_element
        if row_val.ndim - search_element.ndim != 1:
            raise ValueError("inconsistent dimensions for row value and search element")

        result = row_val == search_element
        # reshape along the first dimension and then
        # check how many time search element exists
        return result.reshape(result.shape[0], -1).all(axis=1).sum()

1	# coding=utf-8
2	# Copyright 2018-2023 EvaDB
3	#
4	# Licensed under the Apache License, Version 2.0 (the "License");
5	# you may not use this file except in compliance with the License.
6	# You may obtain a copy of the License at
7	#
8	# http://www.apache.org/licenses/LICENSE-2.0
9	#
10	# Unless required by applicable law or agreed to in writing, software
11	# distributed under the License is distributed on an "AS IS" BASIS,
12	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	# See the License for the specific language governing permissions and
14	# limitations under the License.
15	import numpy as np	×
16	import pandas as pd	×
17
18	from evadb.functions.abstract.abstract_function import AbstractFunction	×
19
20
21	class ArrayCount(AbstractFunction):	×
22	@property	×
23	def name(self) -> str:	×
24	return "ArrayCount"	×
25
26	def setup(self):	×
27	pass	×
28
29	def forward(self, frames: pd.DataFrame) -> pd.DataFrame:	×
30	"""
31	It will return a count of search element for each tuple.
32	The idea is to flatten the input array along the first dimension and
33	count the search element in this flattened array.
34	For example,
35	a tuple of shape (3,4,5) will be flattened into three (4,5) elements.
36	And the search key is expected to be of shape (4,5),
37	else we throw an error.
38
39	frames: DataFrame
40	col1 col2
41	0 ndarray1 search_key
42	1 ndarray2 search_key
43
44	out: DataFrame
45	count
46	0 int
47	1 int
48
49	"""
50	# sanity check
51	if len(frames.columns) != 2:	×
52	raise ValueError("input contains more than one column")
53
54	search_element = frames[frames.columns[-1]][0]	×
55	values = pd.DataFrame(frames[frames.columns[0]])	×
56
57	count_result = values.apply(	×
58	lambda x: self.count_in_row(x[0], search_element), axis=1
59	)
60
61	return pd.DataFrame({"key_count": count_result.values})	×
62
63	def count_in_row(self, row_val, search_element):	×
64	# change the row and search element to numpy array
65	row_val = np.array(row_val)	×
66	search_element = np.array(search_element)	×
67
68	# checks if dimension diff is one between
69	# row_val and search_element
70	if row_val.ndim - search_element.ndim != 1:	×
71	raise ValueError("inconsistent dimensions for row value and search element")
72
73	result = row_val == search_element	×
74	# reshape along the first dimension and then
75	# check how many time search element exists
76	return result.reshape(result.shape[0], -1).all(axis=1).sum()	×

georgia-tech-db / eva / #837

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous