#758

Committed 04 Sep 2023 08:37PM UTC coverage: 0.0% (-78.3%) from 78.333%

Build # #758

Build Type

push

circle-ci

Committed by

hershd23

Commit Message

Increased underline length in at line 75 in text_summarization.rst
	modified:   docs/source/benchmarks/text_summarization.rst

Run Details

0 of 11303 relevant lines covered (0.0%)

0.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/evadb/executor/orderby_executor.py

# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Iterator

from evadb.database import EvaDBDatabase
from evadb.executor.abstract_executor import AbstractExecutor
from evadb.executor.executor_utils import ExecutorError
from evadb.expression.function_expression import FunctionExpression
from evadb.expression.tuple_value_expression import TupleValueExpression
from evadb.models.storage.batch import Batch
from evadb.parser.types import ParserOrderBySortType
from evadb.plan_nodes.orderby_plan import OrderByPlan


class OrderByExecutor(AbstractExecutor):
    """
    Sort the frames which satisfy the condition

    Arguments:
        node (AbstractPlan): The OrderBy Plan

    """

    def __init__(self, db: EvaDBDatabase, node: OrderByPlan):
        super().__init__(db, node)
        self._orderby_list = node.orderby_list
        self._columns = node.columns
        self._sort_types = node.sort_types
        self.batch_sizes = []

    def _extract_column_name(self, col):
        col_name = []
        if isinstance(col, TupleValueExpression):
            col_name += [col.col_alias]
        elif isinstance(col, FunctionExpression):
            col_name += col.col_alias
        else:
            raise ExecutorError(
                "Expression type {} is not supported.".format(type(col))
            )
        return col_name

    def extract_column_names(self):
        """extracts the string name of the column"""
        # self._columns: List[TupleValueExpression]
        col_name_list = []
        for col in self._columns:
            col_name_list += self._extract_column_name(col)
        return col_name_list

    def extract_sort_types(self):
        """extracts the sort type for the column"""
        # self._sort_types: List[ParserOrderBySortType]
        sort_type_bools = []
        for st in self._sort_types:
            if st is ParserOrderBySortType.ASC:
                sort_type_bools.append(True)
            else:
                sort_type_bools.append(False)
        return sort_type_bools

    def exec(self, *args, **kwargs) -> Iterator[Batch]:
        child_executor = self.children[0]
        aggregated_batch_list = []

        # aggregates the batches into one large batch
        for batch in child_executor.exec(**kwargs):
            self.batch_sizes.append(len(batch))
            aggregated_batch_list.append(batch)
        aggregated_batch = Batch.concat(aggregated_batch_list, copy=False)

        # nothing to order by
        if not len(aggregated_batch):
            return

        # Column can be a functional expression, so if it
        # is not in columns, it needs to be re-evaluated.
        merge_batch_list = [aggregated_batch]
        for col in self._columns:
            col_name_list = self._extract_column_name(col)
            for col_name in col_name_list:
                if col_name not in aggregated_batch.columns:
                    batch = col.evaluate(aggregated_batch)
                    merge_batch_list.append(batch)
        if len(merge_batch_list) > 1:
            aggregated_batch = Batch.merge_column_wise(merge_batch_list)

        # sorts the batch
        try:
            aggregated_batch.sort_orderby(
                by=self.extract_column_names(),
                sort_type=self.extract_sort_types(),
            )
        except KeyError:
            # raise ExecutorError(str(e))
            pass

        # split the aggregated batch into smaller ones based
        #  on self.batch_sizes which holds the input batches sizes
        index = 0
        for i in self.batch_sizes:
            batch = aggregated_batch[index : index + i]
            batch.reset_index()
            index += i
            yield batch

1	# coding=utf-8
2	# Copyright 2018-2023 EvaDB
3	#
4	# Licensed under the Apache License, Version 2.0 (the "License");
5	# you may not use this file except in compliance with the License.
6	# You may obtain a copy of the License at
7	#
8	# http://www.apache.org/licenses/LICENSE-2.0
9	#
10	# Unless required by applicable law or agreed to in writing, software
11	# distributed under the License is distributed on an "AS IS" BASIS,
12	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	# See the License for the specific language governing permissions and
14	# limitations under the License.
15	from typing import Iterator	×
16
17	from evadb.database import EvaDBDatabase	×
18	from evadb.executor.abstract_executor import AbstractExecutor	×
19	from evadb.executor.executor_utils import ExecutorError	×
20	from evadb.expression.function_expression import FunctionExpression	×
21	from evadb.expression.tuple_value_expression import TupleValueExpression	×
22	from evadb.models.storage.batch import Batch	×
23	from evadb.parser.types import ParserOrderBySortType	×
24	from evadb.plan_nodes.orderby_plan import OrderByPlan	×
25
26
27	class OrderByExecutor(AbstractExecutor):	×
28	"""
29	Sort the frames which satisfy the condition
30
31	Arguments:
32	node (AbstractPlan): The OrderBy Plan
33
34	"""
35
36	def __init__(self, db: EvaDBDatabase, node: OrderByPlan):	×
37	super().__init__(db, node)	×
38	self._orderby_list = node.orderby_list	×
39	self._columns = node.columns	×
40	self._sort_types = node.sort_types	×
41	self.batch_sizes = []	×
42
43	def _extract_column_name(self, col):	×
44	col_name = []	×
45	if isinstance(col, TupleValueExpression):	×
46	col_name += [col.col_alias]	×
47	elif isinstance(col, FunctionExpression):	×
48	col_name += col.col_alias	×
49	else:
50	raise ExecutorError(
51	"Expression type {} is not supported.".format(type(col))
52	)
53	return col_name	×
54
55	def extract_column_names(self):	×
56	"""extracts the string name of the column"""
57	# self._columns: List[TupleValueExpression]
58	col_name_list = []	×
59	for col in self._columns:	×
60	col_name_list += self._extract_column_name(col)	×
61	return col_name_list	×
62
63	def extract_sort_types(self):	×
64	"""extracts the sort type for the column"""
65	# self._sort_types: List[ParserOrderBySortType]
66	sort_type_bools = []	×
67	for st in self._sort_types:	×
68	if st is ParserOrderBySortType.ASC:	×
69	sort_type_bools.append(True)	×
70	else:
71	sort_type_bools.append(False)	×
72	return sort_type_bools	×
73
74	def exec(self, args, *kwargs) -> Iterator[Batch]:	×
75	child_executor = self.children[0]	×
76	aggregated_batch_list = []	×
77
78	# aggregates the batches into one large batch
79	for batch in child_executor.exec(**kwargs):	×
80	self.batch_sizes.append(len(batch))	×
81	aggregated_batch_list.append(batch)	×
82	aggregated_batch = Batch.concat(aggregated_batch_list, copy=False)	×
83
84	# nothing to order by
85	if not len(aggregated_batch):	×
86	return	×
87
88	# Column can be a functional expression, so if it
89	# is not in columns, it needs to be re-evaluated.
90	merge_batch_list = [aggregated_batch]	×
91	for col in self._columns:	×
92	col_name_list = self._extract_column_name(col)	×
93	for col_name in col_name_list:	×
94	if col_name not in aggregated_batch.columns:	×
95	batch = col.evaluate(aggregated_batch)	×
96	merge_batch_list.append(batch)	×
97	if len(merge_batch_list) > 1:	×
98	aggregated_batch = Batch.merge_column_wise(merge_batch_list)	×
99
100	# sorts the batch
101	try:	×
102	aggregated_batch.sort_orderby(	×
103	by=self.extract_column_names(),
104	sort_type=self.extract_sort_types(),
105	)
106	except KeyError:
107	# raise ExecutorError(str(e))
108	pass
109
110	# split the aggregated batch into smaller ones based
111	# on self.batch_sizes which holds the input batches sizes
112	index = 0	×
113	for i in self.batch_sizes:	×
114	batch = aggregated_batch[index : index + i]	×
115	batch.reset_index()	×
116	index += i	×
117	yield batch	×

georgia-tech-db / eva / #758

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous