a4c010ba-78be-4818-8e6f-1da08c6af280

Committed 31 Aug 2023 11:59PM UTC coverage: 70.992% (-10.6%) from 81.552%

Build # a4c010ba-78be-4818-8e6f-1da08c6af280

Build Type

push

circle-ci

Committed by

web-flow

Commit Message

Merge branch 'staging' into evadb_staging

Run Details

54 of 54 new or added lines in 3 files covered. (100.0%)

8020 of 11297 relevant lines covered (70.99%)

0.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

50.0

/evadb/storage/document_storage_engine.py

# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path
from typing import Iterator

from evadb.catalog.models.table_catalog import TableCatalogEntry
from evadb.database import EvaDBDatabase
from evadb.models.storage.batch import Batch
from evadb.readers.document.document_reader import DocumentReader
from evadb.storage.abstract_media_storage_engine import AbstractMediaStorageEngine


class DocumentStorageEngine(AbstractMediaStorageEngine):
    def __init__(self, db: EvaDBDatabase):
        super().__init__(db)

    def read(self, table: TableCatalogEntry, chunk_params: dict) -> Iterator[Batch]:
        for doc_files in self._rdb_handler.read(self._get_metadata_table(table), 12):
            for _, (row_id, file_name) in doc_files.iterrows():
                system_file_name = self._xform_file_url_to_file_name(file_name)
                doc_file = Path(table.file_url) / system_file_name
                # setting batch_mem_size = 1, we need fix it
                reader = DocumentReader(
                    str(doc_file), batch_mem_size=1, chunk_params=chunk_params
                )
                for batch in reader.read():
                    batch.frames[table.columns[0].name] = row_id
                    batch.frames[table.columns[1].name] = str(file_name)
                    yield batch

1	# coding=utf-8
2	# Copyright 2018-2023 EvaDB
3	#
4	# Licensed under the Apache License, Version 2.0 (the "License");
5	# you may not use this file except in compliance with the License.
6	# You may obtain a copy of the License at
7	#
8	# http://www.apache.org/licenses/LICENSE-2.0
9	#
10	# Unless required by applicable law or agreed to in writing, software
11	# distributed under the License is distributed on an "AS IS" BASIS,
12	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	# See the License for the specific language governing permissions and
14	# limitations under the License.
15	from pathlib import Path	1✔
16	from typing import Iterator	1✔
17
18	from evadb.catalog.models.table_catalog import TableCatalogEntry	1✔
19	from evadb.database import EvaDBDatabase	1✔
20	from evadb.models.storage.batch import Batch	1✔
21	from evadb.readers.document.document_reader import DocumentReader	1✔
22	from evadb.storage.abstract_media_storage_engine import AbstractMediaStorageEngine	1✔
23
24
25	class DocumentStorageEngine(AbstractMediaStorageEngine):	1✔
26	def __init__(self, db: EvaDBDatabase):	1✔
27	super().__init__(db)	×
28
29	def read(self, table: TableCatalogEntry, chunk_params: dict) -> Iterator[Batch]:	1✔
30	for doc_files in self._rdb_handler.read(self._get_metadata_table(table), 12):	×
31	for _, (row_id, file_name) in doc_files.iterrows():	×
32	system_file_name = self._xform_file_url_to_file_name(file_name)	×
33	doc_file = Path(table.file_url) / system_file_name	×
34	# setting batch_mem_size = 1, we need fix it
35	reader = DocumentReader(	×
36	str(doc_file), batch_mem_size=1, chunk_params=chunk_params
37	)
38	for batch in reader.read():	×
39	batch.frames[table.columns[0].name] = row_id	×
40	batch.frames[table.columns[1].name] = str(file_name)	×
41	yield batch	×

georgia-tech-db / eva / a4c010ba-78be-4818-8e6f-1da08c6af280

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous