• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

georgia-tech-db / eva / #758

04 Sep 2023 08:37PM UTC coverage: 0.0% (-78.3%) from 78.333%
#758

push

circle-ci

hershd23
Increased underline length in at line 75 in text_summarization.rst
	modified:   docs/source/benchmarks/text_summarization.rst

0 of 11303 relevant lines covered (0.0%)

0.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/evadb/storage/abstract_media_storage_engine.py
1
# coding=utf-8
2
# Copyright 2018-2023 EvaDB
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
import os
×
16
import re
×
17
import shutil
×
18
from pathlib import Path
×
19

20
import pandas as pd
×
21

22
from evadb.catalog.models.table_catalog import TableCatalogEntry
×
23
from evadb.database import EvaDBDatabase
×
24
from evadb.models.storage.batch import Batch
×
25
from evadb.parser.table_ref import TableInfo
×
26
from evadb.storage.abstract_storage_engine import AbstractStorageEngine
×
27
from evadb.storage.sqlite_storage_engine import SQLStorageEngine
×
28
from evadb.utils.logging_manager import logger
×
29

30

31
class AbstractMediaStorageEngine(AbstractStorageEngine):
×
32
    def __init__(self, db: EvaDBDatabase):
×
33
        super().__init__(db)
×
34
        self._rdb_handler: SQLStorageEngine = SQLStorageEngine(db)
×
35

36
    def _get_metadata_table(self, table: TableCatalogEntry):
×
37
        return self.db.catalog().get_multimedia_metadata_table_catalog_entry(table)
×
38

39
    def _create_metadata_table(self, table: TableCatalogEntry):
×
40
        return (
×
41
            self.db.catalog().create_and_insert_multimedia_metadata_table_catalog_entry(
42
                table
43
            )
44
        )
45

46
    def _xform_file_url_to_file_name(self, file_url: Path) -> str:
×
47
        # Convert media_path to file name. This is done to support duplicate media_names with
48
        # different complete paths. Without conversion, we cannot copy files with same name but
49
        # different paths. Eg., a/b/my.mp4 and a/b/c/my.mp4.
50
        # xformed_file_name = zlib.crc32(str(file_url).encode("utf-8")) & 0xFFFFFFFF
51
        # return str(xformed_file_name)
52

53
        # Previous approach with hashing is commented out above. Since we now use symbolic link, the only
54
        # thing we need to worry about is the same file name under different directory. This motivates us
55
        # to just breakdown directory also as part of file name. Additionally, it does not use hashing,
56
        # which avoids computation overhead.
57
        file_path_str = str(file_url)
×
58
        file_path = re.sub(r"[^a-zA-Z0-9 \.\n]", "_", file_path_str)
×
59
        return file_path
×
60

61
    def create(self, table: TableCatalogEntry, if_not_exists=True):
×
62
        """
63
        Create the directory to store the images.
64
        Create a sqlite table to persist the file urls
65
        """
66
        dir_path = Path(table.file_url)
×
67
        try:
×
68
            dir_path.mkdir(parents=True)
×
69
        except FileExistsError:
70
            if if_not_exists:
71
                return True
72
            error = "Failed to load the image as directory \
73
                        already exists: {}".format(
74
                dir_path
75
            )
76
            logger.error(error)
77
            raise FileExistsError(error)
78

79
        self._rdb_handler.create(self._create_metadata_table(table))
×
80
        return True
×
81

82
    def drop(self, table: TableCatalogEntry):
×
83
        try:
×
84
            dir_path = Path(table.file_url)
×
85
            shutil.rmtree(str(dir_path))
×
86
            metadata_table = self._get_metadata_table(table)
×
87
            self._rdb_handler.drop(metadata_table)
×
88
            # remove the metadata table from the catalog
89
            self.db.catalog().delete_table_catalog_entry(metadata_table)
×
90
        except Exception as e:
91
            err_msg = f"Failed to drop the image table {e}"
92
            logger.exception(err_msg)
93
            raise Exception(err_msg)
94

95
    def delete(self, table: TableCatalogEntry, rows: Batch):
×
96
        try:
×
97
            media_metadata_table = self._get_metadata_table(table)
×
98
            for media_file_path in rows.file_paths():
×
99
                dst_file_name = self._xform_file_url_to_file_name(Path(media_file_path))
×
100
                image_file = Path(table.file_url) / dst_file_name
×
101
                self._rdb_handler.delete(
×
102
                    media_metadata_table,
103
                    where_clause={
104
                        media_metadata_table.identifier_column: str(media_file_path)
105
                    },
106
                )
107
                image_file.unlink()
×
108
        except Exception as e:
109
            error = f"Deleting file path {media_file_path} failed with exception {e}"
110
            logger.exception(error)
111
            raise RuntimeError(error)
112
        return True
×
113

114
    def write(self, table: TableCatalogEntry, rows: Batch):
×
115
        try:
×
116
            dir_path = Path(table.file_url)
×
117
            copied_files = []
×
118
            for media_file_path in rows.file_paths():
×
119
                media_file = Path(media_file_path)
×
120
                dst_file_name = self._xform_file_url_to_file_name(media_file)
×
121
                dst_path = dir_path / dst_file_name
×
122
                if dst_path.exists():
×
123
                    raise FileExistsError(
124
                        f"Duplicate File: {media_file} already exists in the table {table.name}"
125
                    )
126
                src_path = Path.cwd() / media_file
×
127
                os.symlink(src_path, dst_path)
×
128
                copied_files.append(dst_path)
×
129
            # assuming sql write is an atomic operation
130
            self._rdb_handler.write(
×
131
                self._get_metadata_table(table),
132
                Batch(pd.DataFrame({"file_url": list(rows.file_paths())})),
133
            )
134

135
        except Exception as e:
136
            # delete the copied_files
137
            for file in copied_files:
138
                logger.info(f"Rollback file {file}")
139
                file.unlink()
140
            logger.exception(str(e))
141
            raise RuntimeError(str(e))
142
        else:
143
            return True
×
144

145
    def rename(self, old_table: TableCatalogEntry, new_name: TableInfo):
×
146
        try:
×
147
            self.db.catalog().rename_table_catalog_entry(old_table, new_name)
×
148
        except Exception as e:
149
            raise Exception(f"Failed to rename table {new_name} with exception {e}")
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc