b4a3b4d1-cb33-4fa9-8d9c-0174de8abd24

Committed 13 Sep 2023 08:11PM UTC coverage: 69.982% (-0.08%) from 70.065%

Build # b4a3b4d1-cb33-4fa9-8d9c-0174de8abd24

Build Type

push

circle-ci

Committed by

web-flow

Commit Message

Set the right output column type for forecast functions (#1108)

Current forecast functions only output the y value. This PR fixes the
binded output column, so forecasting functions also return unqiue_id and
datastamp column.

- [x] Fix binded output column object 
- [x] Fix forecast test cases
- [x] Add new binder testcases for binded output column object.

Run Details

26 of 26 new or added lines in 3 files covered. (100.0%)

8274 of 11823 relevant lines covered (69.98%)

0.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

44.37

/evadb/executor/create_function_executor.py

# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
import os
import pickle
from pathlib import Path
from typing import Dict, List

import pandas as pd

from evadb.catalog.catalog_utils import get_metadata_properties
from evadb.catalog.models.function_catalog import FunctionCatalogEntry
from evadb.catalog.models.function_io_catalog import FunctionIOCatalogEntry
from evadb.catalog.models.function_metadata_catalog import FunctionMetadataCatalogEntry
from evadb.configuration.constants import (
    DEFAULT_TRAIN_TIME_LIMIT,
    EvaDB_INSTALLATION_DIR,
)
from evadb.database import EvaDBDatabase
from evadb.executor.abstract_executor import AbstractExecutor
from evadb.functions.decorators.utils import load_io_from_function_decorators
from evadb.models.storage.batch import Batch
from evadb.plan_nodes.create_function_plan import CreateFunctionPlan
from evadb.third_party.huggingface.create import gen_hf_io_catalog_entries
from evadb.utils.errors import FunctionIODefinitionError
from evadb.utils.generic_utils import (
    load_function_class_from_file,
    string_comparison_case_insensitive,
    try_to_import_forecast,
    try_to_import_ludwig,
    try_to_import_torch,
    try_to_import_ultralytics,
)
from evadb.utils.logging_manager import logger


class CreateFunctionExecutor(AbstractExecutor):
    def __init__(self, db: EvaDBDatabase, node: CreateFunctionPlan):
        super().__init__(db, node)
        self.function_dir = Path(EvaDB_INSTALLATION_DIR) / "functions"

    def handle_huggingface_function(self):
        """Handle HuggingFace functions

        HuggingFace functions are special functions that are not loaded from a file.
        So we do not need to call the setup method on them like we do for other functions.
        """
        # We need at least one deep learning framework for HuggingFace
        # Torch or Tensorflow
        try_to_import_torch()
        impl_path = f"{self.function_dir}/abstract/hf_abstract_function.py"
        io_list = gen_hf_io_catalog_entries(self.node.name, self.node.metadata)
        return (
            self.node.name,
            impl_path,
            self.node.function_type,
            io_list,
            self.node.metadata,
        )

    def handle_ludwig_function(self):
        """Handle ludwig functions

        Use Ludwig's auto_train engine to train/tune models.
        """
        try_to_import_ludwig()
        from ludwig.automl import auto_train

        assert (
            len(self.children) == 1
        ), "Create ludwig function expects 1 child, finds {}.".format(
            len(self.children)
        )

        aggregated_batch_list = []
        child = self.children[0]
        for batch in child.exec():
            aggregated_batch_list.append(batch)
        aggregated_batch = Batch.concat(aggregated_batch_list, copy=False)
        aggregated_batch.drop_column_alias()

        arg_map = {arg.key: arg.value for arg in self.node.metadata}
        auto_train_results = auto_train(
            dataset=aggregated_batch.frames,
            target=arg_map["predict"],
            tune_for_memory=arg_map.get("tune_for_memory", False),
            time_limit_s=arg_map.get("time_limit", DEFAULT_TRAIN_TIME_LIMIT),
            output_directory=self.db.config.get_value("storage", "tmp_dir"),
        )
        model_path = os.path.join(
            self.db.config.get_value("storage", "model_dir"), self.node.name
        )
        auto_train_results.best_model.save(model_path)
        self.node.metadata.append(
            FunctionMetadataCatalogEntry("model_path", model_path)
        )

        impl_path = Path(f"{self.function_dir}/ludwig.py").absolute().as_posix()
        io_list = self._resolve_function_io(None)
        return (
            self.node.name,
            impl_path,
            self.node.function_type,
            io_list,
            self.node.metadata,
        )

    def handle_ultralytics_function(self):
        """Handle Ultralytics functions"""
        try_to_import_ultralytics()

        impl_path = (
            Path(f"{self.function_dir}/yolo_object_detector.py").absolute().as_posix()
        )
        function = self._try_initializing_function(
            impl_path, function_args=get_metadata_properties(self.node)
        )
        io_list = self._resolve_function_io(function)
        return (
            self.node.name,
            impl_path,
            self.node.function_type,
            io_list,
            self.node.metadata,
        )

    def handle_forecasting_function(self):
        """Handle forecasting functions"""
        aggregated_batch_list = []
        child = self.children[0]
        for batch in child.exec():
            aggregated_batch_list.append(batch)
        aggregated_batch = Batch.concat(aggregated_batch_list, copy=False)
        aggregated_batch.drop_column_alias()

        arg_map = {arg.key: arg.value for arg in self.node.metadata}
        if not self.node.impl_path:
            impl_path = Path(f"{self.function_dir}/forecast.py").absolute().as_posix()
        else:
            impl_path = self.node.impl_path.absolute().as_posix()

        if "model" not in arg_map.keys():
            arg_map["model"] = "AutoARIMA"

        model_name = arg_map["model"]

        """
        The following rename is needed for statsforecast, which requires the column name to be the following:
        - The unique_id (string, int or category) represents an identifier for the series.
        - The ds (datestamp) column should be of a format expected by Pandas, ideally YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp.
        - The y (numeric) represents the measurement we wish to forecast.
        For reference: https://nixtla.github.io/statsforecast/docs/getting-started/getting_started_short.html
        """
        aggregated_batch.rename(columns={arg_map["predict"]: "y"})
        if "time" in arg_map.keys():
            aggregated_batch.rename(columns={arg_map["time"]: "ds"})
        if "id" in arg_map.keys():
            aggregated_batch.rename(columns={arg_map["id"]: "unique_id"})

        data = aggregated_batch.frames
        if "unique_id" not in list(data.columns):
            data["unique_id"] = [1 for x in range(len(data))]

        if "ds" not in list(data.columns):
            data["ds"] = [x + 1 for x in range(len(data))]

        if "frequency" not in arg_map.keys():
            arg_map["frequency"] = pd.infer_freq(data["ds"])
        frequency = arg_map["frequency"]
        if frequency is None:
            raise RuntimeError(
                f"Can not infer the frequency for {self.node.name}. Please explictly set it."
            )

        try_to_import_forecast()
        from statsforecast import StatsForecast
        from statsforecast.models import AutoARIMA, AutoCES, AutoETS, AutoTheta

        model_dict = {
            "AutoARIMA": AutoARIMA,
            "AutoCES": AutoCES,
            "AutoETS": AutoETS,
            "AutoTheta": AutoTheta,
        }

        season_dict = {  # https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases
            "H": 24,
            "M": 12,
            "Q": 4,
            "SM": 24,
            "BM": 12,
            "BMS": 12,
            "BQ": 4,
            "BH": 24,
        }

        new_freq = (
            frequency.split("-")[0] if "-" in frequency else frequency
        )  # shortens longer frequencies like Q-DEC
        season_length = season_dict[new_freq] if new_freq in season_dict else 1
        model = StatsForecast(
            [model_dict[model_name](season_length=season_length)], freq=new_freq
        )

        model_dir = os.path.join(
            self.db.config.get_value("storage", "model_dir"), self.node.name
        )
        Path(model_dir).mkdir(parents=True, exist_ok=True)
        model_path = os.path.join(
            self.db.config.get_value("storage", "model_dir"),
            self.node.name,
            str(hashlib.sha256(data.to_string().encode()).hexdigest()) + ".pkl",
        )

        weight_file = Path(model_path)
        data["ds"] = pd.to_datetime(data["ds"])
        if not weight_file.exists():
            model.fit(data)
            f = open(model_path, "wb")
            pickle.dump(model, f)
            f.close()

        io_list = self._resolve_function_io(None)

        metadata_here = [
            FunctionMetadataCatalogEntry("model_name", model_name),
            FunctionMetadataCatalogEntry("model_path", model_path),
            FunctionMetadataCatalogEntry(
                "predict_column_rename", arg_map.get("predict", "y")
            ),
            FunctionMetadataCatalogEntry(
                "time_column_rename", arg_map.get("time", "ds")
            ),
            FunctionMetadataCatalogEntry(
                "id_column_rename", arg_map.get("id", "unique_id")
            ),
        ]

        return (
            self.node.name,
            impl_path,
            self.node.function_type,
            io_list,
            metadata_here,
        )

    def handle_generic_function(self):
        """Handle generic functions

        Generic functions are loaded from a file. We check for inputs passed by the user during CREATE or try to load io from decorators.
        """
        impl_path = self.node.impl_path.absolute().as_posix()
        function = self._try_initializing_function(impl_path)
        io_list = self._resolve_function_io(function)

        return (
            self.node.name,
            impl_path,
            self.node.function_type,
            io_list,
            self.node.metadata,
        )

    def exec(self, *args, **kwargs):
        """Create function executor

        Calls the catalog to insert a function catalog entry.
        """
        # check catalog if it already has this function entry
        if self.catalog().get_function_catalog_entry_by_name(self.node.name):
            if self.node.if_not_exists:
                msg = f"Function {self.node.name} already exists, nothing added."
                yield Batch(pd.DataFrame([msg]))
                return
            else:
                msg = f"Function {self.node.name} already exists."
                logger.error(msg)
                raise RuntimeError(msg)

        # if it's a type of HuggingFaceModel, override the impl_path
        if string_comparison_case_insensitive(self.node.function_type, "HuggingFace"):
            (
                name,
                impl_path,
                function_type,
                io_list,
                metadata,
            ) = self.handle_huggingface_function()
        elif string_comparison_case_insensitive(self.node.function_type, "ultralytics"):
            (
                name,
                impl_path,
                function_type,
                io_list,
                metadata,
            ) = self.handle_ultralytics_function()
        elif string_comparison_case_insensitive(self.node.function_type, "Ludwig"):
            (
                name,
                impl_path,
                function_type,
                io_list,
                metadata,
            ) = self.handle_ludwig_function()
        elif string_comparison_case_insensitive(self.node.function_type, "Forecasting"):
            (
                name,
                impl_path,
                function_type,
                io_list,
                metadata,
            ) = self.handle_forecasting_function()
        else:
            (
                name,
                impl_path,
                function_type,
                io_list,
                metadata,
            ) = self.handle_generic_function()

        self.catalog().insert_function_catalog_entry(
            name, impl_path, function_type, io_list, metadata
        )
        yield Batch(
            pd.DataFrame(
                [f"Function {self.node.name} successfully added to the database."]
            )
        )

    def _try_initializing_function(
        self, impl_path: str, function_args: Dict = {}
    ) -> FunctionCatalogEntry:
        """Attempts to initialize function given the implementation file path and arguments.

        Args:
            impl_path (str): The file path of the function implementation file.
            function_args (Dict, optional): Dictionary of arguments to pass to the function. Defaults to {}.

        Returns:
            FunctionCatalogEntry: A FunctionCatalogEntry object that represents the initialized function.

        Raises:
            RuntimeError: If an error occurs while initializing the function.
        """

        # load the function class from the file
        try:
            # loading the function class from the file
            function = load_function_class_from_file(impl_path, self.node.name)
            # initializing the function class calls the setup method internally
            function(**function_args)
        except Exception as e:
            err_msg = f"Error creating function: {str(e)}"
            # logger.error(err_msg)
            raise RuntimeError(err_msg)

        return function

    def _resolve_function_io(
        self, function: FunctionCatalogEntry
    ) -> List[FunctionIOCatalogEntry]:
        """Private method that resolves the input/output definitions for a given function.
        It first searches for the input/outputs in the CREATE statement. If not found, it resolves them using decorators. If not found there as well, it raises an error.

        Args:
            function (FunctionCatalogEntry): The function for which to resolve input and output definitions.

        Returns:
            A List of FunctionIOCatalogEntry objects that represent the resolved input and
            output definitions for the function.

        Raises:
            RuntimeError: If an error occurs while resolving the function input/output
            definitions.
        """
        io_list = []
        try:
            if self.node.inputs:
                io_list.extend(self.node.inputs)
            else:
                # try to load the inputs from decorators, the inputs from CREATE statement take precedence
                io_list.extend(
                    load_io_from_function_decorators(function, is_input=True)
                )

            if self.node.outputs:
                io_list.extend(self.node.outputs)
            else:
                # try to load the outputs from decorators, the outputs from CREATE statement take precedence
                io_list.extend(
                    load_io_from_function_decorators(function, is_input=False)
                )

        except FunctionIODefinitionError as e:
            err_msg = (
                f"Error creating function, input/output definition incorrect: {str(e)}"
            )
            logger.error(err_msg)
            raise RuntimeError(err_msg)

        return io_list

1	# coding=utf-8
2	# Copyright 2018-2023 EvaDB
3	#
4	# Licensed under the Apache License, Version 2.0 (the "License");
5	# you may not use this file except in compliance with the License.
6	# You may obtain a copy of the License at
7	#
8	# http://www.apache.org/licenses/LICENSE-2.0
9	#
10	# Unless required by applicable law or agreed to in writing, software
11	# distributed under the License is distributed on an "AS IS" BASIS,
12	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	# See the License for the specific language governing permissions and
14	# limitations under the License.
15	import hashlib	1✔
16	import os	1✔
17	import pickle	1✔
18	from pathlib import Path	1✔
19	from typing import Dict, List	1✔
20
21	import pandas as pd	1✔
22
23	from evadb.catalog.catalog_utils import get_metadata_properties	1✔
24	from evadb.catalog.models.function_catalog import FunctionCatalogEntry	1✔
25	from evadb.catalog.models.function_io_catalog import FunctionIOCatalogEntry	1✔
26	from evadb.catalog.models.function_metadata_catalog import FunctionMetadataCatalogEntry	1✔
27	from evadb.configuration.constants import (	1✔
28	DEFAULT_TRAIN_TIME_LIMIT,
29	EvaDB_INSTALLATION_DIR,
30	)
31	from evadb.database import EvaDBDatabase	1✔
32	from evadb.executor.abstract_executor import AbstractExecutor	1✔
33	from evadb.functions.decorators.utils import load_io_from_function_decorators	1✔
34	from evadb.models.storage.batch import Batch	1✔
35	from evadb.plan_nodes.create_function_plan import CreateFunctionPlan	1✔
36	from evadb.third_party.huggingface.create import gen_hf_io_catalog_entries	1✔
37	from evadb.utils.errors import FunctionIODefinitionError	1✔
38	from evadb.utils.generic_utils import (	1✔
39	load_function_class_from_file,
40	string_comparison_case_insensitive,
41	try_to_import_forecast,
42	try_to_import_ludwig,
43	try_to_import_torch,
44	try_to_import_ultralytics,
45	)
46	from evadb.utils.logging_manager import logger	1✔
47
48
49	class CreateFunctionExecutor(AbstractExecutor):	1✔
50	def __init__(self, db: EvaDBDatabase, node: CreateFunctionPlan):	1✔
51	super().__init__(db, node)	1✔
52	self.function_dir = Path(EvaDB_INSTALLATION_DIR) / "functions"	1✔
53
54	def handle_huggingface_function(self):	1✔
55	"""Handle HuggingFace functions
56
57	HuggingFace functions are special functions that are not loaded from a file.
58	So we do not need to call the setup method on them like we do for other functions.
59	"""
60	# We need at least one deep learning framework for HuggingFace
61	# Torch or Tensorflow
62	try_to_import_torch()	×
63	impl_path = f"{self.function_dir}/abstract/hf_abstract_function.py"	×
64	io_list = gen_hf_io_catalog_entries(self.node.name, self.node.metadata)	×
65	return (	×
66	self.node.name,
67	impl_path,
68	self.node.function_type,
69	io_list,
70	self.node.metadata,
71	)
72
73	def handle_ludwig_function(self):	1✔
74	"""Handle ludwig functions
75
76	Use Ludwig's auto_train engine to train/tune models.
77	"""
78	try_to_import_ludwig()	×
79	from ludwig.automl import auto_train	×
80
81	assert (	×
82	len(self.children) == 1
83	), "Create ludwig function expects 1 child, finds {}.".format(
84	len(self.children)
85	)
86
87	aggregated_batch_list = []	×
88	child = self.children[0]	×
89	for batch in child.exec():	×
90	aggregated_batch_list.append(batch)	×
91	aggregated_batch = Batch.concat(aggregated_batch_list, copy=False)	×
92	aggregated_batch.drop_column_alias()	×
93
94	arg_map = {arg.key: arg.value for arg in self.node.metadata}	×
95	auto_train_results = auto_train(	×
96	dataset=aggregated_batch.frames,
97	target=arg_map["predict"],
98	tune_for_memory=arg_map.get("tune_for_memory", False),
99	time_limit_s=arg_map.get("time_limit", DEFAULT_TRAIN_TIME_LIMIT),
100	output_directory=self.db.config.get_value("storage", "tmp_dir"),
101	)
102	model_path = os.path.join(	×
103	self.db.config.get_value("storage", "model_dir"), self.node.name
104	)
105	auto_train_results.best_model.save(model_path)	×
106	self.node.metadata.append(	×
107	FunctionMetadataCatalogEntry("model_path", model_path)
108	)
109
110	impl_path = Path(f"{self.function_dir}/ludwig.py").absolute().as_posix()	×
111	io_list = self._resolve_function_io(None)	×
112	return (	×
113	self.node.name,
114	impl_path,
115	self.node.function_type,
116	io_list,
117	self.node.metadata,
118	)
119
120	def handle_ultralytics_function(self):	1✔
121	"""Handle Ultralytics functions"""
122	try_to_import_ultralytics()	1✔
123
124	impl_path = (	1✔
125	Path(f"{self.function_dir}/yolo_object_detector.py").absolute().as_posix()
126	)
127	function = self._try_initializing_function(	1✔
128	impl_path, function_args=get_metadata_properties(self.node)
129	)
130	io_list = self._resolve_function_io(function)	1✔
131	return (	1✔
132	self.node.name,
133	impl_path,
134	self.node.function_type,
135	io_list,
136	self.node.metadata,
137	)
138
139	def handle_forecasting_function(self):	1✔
140	"""Handle forecasting functions"""
141	aggregated_batch_list = []	×
142	child = self.children[0]	×
143	for batch in child.exec():	×
144	aggregated_batch_list.append(batch)	×
145	aggregated_batch = Batch.concat(aggregated_batch_list, copy=False)	×
146	aggregated_batch.drop_column_alias()	×
147
148	arg_map = {arg.key: arg.value for arg in self.node.metadata}	×
149	if not self.node.impl_path:	×
150	impl_path = Path(f"{self.function_dir}/forecast.py").absolute().as_posix()	×
151	else:
152	impl_path = self.node.impl_path.absolute().as_posix()	×
153
154	if "model" not in arg_map.keys():	×
155	arg_map["model"] = "AutoARIMA"	×
156
157	model_name = arg_map["model"]	×
158
159	"""	×
160	The following rename is needed for statsforecast, which requires the column name to be the following:
161	- The unique_id (string, int or category) represents an identifier for the series.
162	- The ds (datestamp) column should be of a format expected by Pandas, ideally YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp.
163	- The y (numeric) represents the measurement we wish to forecast.
164	For reference: https://nixtla.github.io/statsforecast/docs/getting-started/getting_started_short.html
165	"""
166	aggregated_batch.rename(columns={arg_map["predict"]: "y"})	×
167	if "time" in arg_map.keys():	×
168	aggregated_batch.rename(columns={arg_map["time"]: "ds"})	×
169	if "id" in arg_map.keys():	×
170	aggregated_batch.rename(columns={arg_map["id"]: "unique_id"})	×
171
172	data = aggregated_batch.frames	×
173	if "unique_id" not in list(data.columns):	×
174	data["unique_id"] = [1 for x in range(len(data))]	×
175
176	if "ds" not in list(data.columns):	×
177	data["ds"] = [x + 1 for x in range(len(data))]	×
178
179	if "frequency" not in arg_map.keys():	×
180	arg_map["frequency"] = pd.infer_freq(data["ds"])	×
181	frequency = arg_map["frequency"]	×
182	if frequency is None:	×
183	raise RuntimeError(
184	f"Can not infer the frequency for {self.node.name}. Please explictly set it."
185	)
186
187	try_to_import_forecast()	×
188	from statsforecast import StatsForecast	×
189	from statsforecast.models import AutoARIMA, AutoCES, AutoETS, AutoTheta	×
190
191	model_dict = {	×
192	"AutoARIMA": AutoARIMA,
193	"AutoCES": AutoCES,
194	"AutoETS": AutoETS,
195	"AutoTheta": AutoTheta,
196	}
197
198	season_dict = { # https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases	×
199	"H": 24,
200	"M": 12,
201	"Q": 4,
202	"SM": 24,
203	"BM": 12,
204	"BMS": 12,
205	"BQ": 4,
206	"BH": 24,
207	}
208
209	new_freq = (	×
210	frequency.split("-")[0] if "-" in frequency else frequency
211	) # shortens longer frequencies like Q-DEC
212	season_length = season_dict[new_freq] if new_freq in season_dict else 1	×
213	model = StatsForecast(	×
214	[model_dict[model_name](season_length=season_length)], freq=new_freq
215	)
216
217	model_dir = os.path.join(	×
218	self.db.config.get_value("storage", "model_dir"), self.node.name
219	)
220	Path(model_dir).mkdir(parents=True, exist_ok=True)	×
221	model_path = os.path.join(	×
222	self.db.config.get_value("storage", "model_dir"),
223	self.node.name,
224	str(hashlib.sha256(data.to_string().encode()).hexdigest()) + ".pkl",
225	)
226
227	weight_file = Path(model_path)	×
228	data["ds"] = pd.to_datetime(data["ds"])	×
229	if not weight_file.exists():	×
230	model.fit(data)	×
231	f = open(model_path, "wb")	×
232	pickle.dump(model, f)	×
233	f.close()	×
234
235	io_list = self._resolve_function_io(None)	×
236
237	metadata_here = [	×
238	FunctionMetadataCatalogEntry("model_name", model_name),
239	FunctionMetadataCatalogEntry("model_path", model_path),
240	FunctionMetadataCatalogEntry(
241	"predict_column_rename", arg_map.get("predict", "y")
242	),
243	FunctionMetadataCatalogEntry(
244	"time_column_rename", arg_map.get("time", "ds")
245	),
246	FunctionMetadataCatalogEntry(
247	"id_column_rename", arg_map.get("id", "unique_id")
248	),
249	]
250
251	return (	×
252	self.node.name,
253	impl_path,
254	self.node.function_type,
255	io_list,
256	metadata_here,
257	)
258
259	def handle_generic_function(self):	1✔
260	"""Handle generic functions
261
262	Generic functions are loaded from a file. We check for inputs passed by the user during CREATE or try to load io from decorators.
263	"""
264	impl_path = self.node.impl_path.absolute().as_posix()	1✔
265	function = self._try_initializing_function(impl_path)	1✔
266	io_list = self._resolve_function_io(function)	1✔
267
268	return (	1✔
269	self.node.name,
270	impl_path,
271	self.node.function_type,
272	io_list,
273	self.node.metadata,
274	)
275
276	def exec(self, args, *kwargs):	1✔
277	"""Create function executor
278
279	Calls the catalog to insert a function catalog entry.
280	"""
281	# check catalog if it already has this function entry
282	if self.catalog().get_function_catalog_entry_by_name(self.node.name):	1✔
283	if self.node.if_not_exists:	×
284	msg = f"Function {self.node.name} already exists, nothing added."	×
285	yield Batch(pd.DataFrame([msg]))	×
286	return	×
287	else:
288	msg = f"Function {self.node.name} already exists."	×
289	logger.error(msg)	×
290	raise RuntimeError(msg)
291
292	# if it's a type of HuggingFaceModel, override the impl_path
293	if string_comparison_case_insensitive(self.node.function_type, "HuggingFace"):	1✔
294	(	×
295	name,
296	impl_path,
297	function_type,
298	io_list,
299	metadata,
300	) = self.handle_huggingface_function()
301	elif string_comparison_case_insensitive(self.node.function_type, "ultralytics"):	1✔
302	(	1✔
303	name,
304	impl_path,
305	function_type,
306	io_list,
307	metadata,
308	) = self.handle_ultralytics_function()
309	elif string_comparison_case_insensitive(self.node.function_type, "Ludwig"):	1✔
310	(	×
311	name,
312	impl_path,
313	function_type,
314	io_list,
315	metadata,
316	) = self.handle_ludwig_function()
317	elif string_comparison_case_insensitive(self.node.function_type, "Forecasting"):	1✔
318	(	×
319	name,
320	impl_path,
321	function_type,
322	io_list,
323	metadata,
324	) = self.handle_forecasting_function()
325	else:
326	(	1✔
327	name,
328	impl_path,
329	function_type,
330	io_list,
331	metadata,
332	) = self.handle_generic_function()
333
334	self.catalog().insert_function_catalog_entry(	1✔
335	name, impl_path, function_type, io_list, metadata
336	)
337	yield Batch(	1✔
338	pd.DataFrame(
339	[f"Function {self.node.name} successfully added to the database."]
340	)
341	)
342
343	def _try_initializing_function(	1✔
344	self, impl_path: str, function_args: Dict = {}
345	) -> FunctionCatalogEntry:
346	"""Attempts to initialize function given the implementation file path and arguments.
347
348	Args:
349	impl_path (str): The file path of the function implementation file.
350	function_args (Dict, optional): Dictionary of arguments to pass to the function. Defaults to {}.
351
352	Returns:
353	FunctionCatalogEntry: A FunctionCatalogEntry object that represents the initialized function.
354
355	Raises:
356	RuntimeError: If an error occurs while initializing the function.
357	"""
358
359	# load the function class from the file
360	try:	1✔
361	# loading the function class from the file
362	function = load_function_class_from_file(impl_path, self.node.name)	1✔
363	# initializing the function class calls the setup method internally
364	function(**function_args)	1✔
365	except Exception as e:
366	err_msg = f"Error creating function: {str(e)}"
367	# logger.error(err_msg)
368	raise RuntimeError(err_msg)
369
370	return function	1✔
371
372	def _resolve_function_io(	1✔
373	self, function: FunctionCatalogEntry
374	) -> List[FunctionIOCatalogEntry]:
375	"""Private method that resolves the input/output definitions for a given function.
376	It first searches for the input/outputs in the CREATE statement. If not found, it resolves them using decorators. If not found there as well, it raises an error.
377
378	Args:
379	function (FunctionCatalogEntry): The function for which to resolve input and output definitions.
380
381	Returns:
382	A List of FunctionIOCatalogEntry objects that represent the resolved input and
383	output definitions for the function.
384
385	Raises:
386	RuntimeError: If an error occurs while resolving the function input/output
387	definitions.
388	"""
389	io_list = []	1✔
390	try:	1✔
391	if self.node.inputs:	1✔
392	io_list.extend(self.node.inputs)	1✔
393	else:
394	# try to load the inputs from decorators, the inputs from CREATE statement take precedence
395	io_list.extend(	1✔
396	load_io_from_function_decorators(function, is_input=True)
397	)
398
399	if self.node.outputs:	1✔
400	io_list.extend(self.node.outputs)	1✔
401	else:
402	# try to load the outputs from decorators, the outputs from CREATE statement take precedence
403	io_list.extend(	1✔
404	load_io_from_function_decorators(function, is_input=False)
405	)
406
407	except FunctionIODefinitionError as e:
408	err_msg = (
409	f"Error creating function, input/output definition incorrect: {str(e)}"
410	)
411	logger.error(err_msg)
412	raise RuntimeError(err_msg)
413
414	return io_list	1✔

georgia-tech-db / eva / b4a3b4d1-cb33-4fa9-8d9c-0174de8abd24

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous