• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

georgia-tech-db / eva / #758

04 Sep 2023 08:37PM UTC coverage: 0.0% (-78.3%) from 78.333%
#758

push

circle-ci

hershd23
Increased underline length in at line 75 in text_summarization.rst
	modified:   docs/source/benchmarks/text_summarization.rst

0 of 11303 relevant lines covered (0.0%)

0.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/evadb/third_party/huggingface/create.py
1
# coding=utf-8
2
# Copyright 2018-2023 EvaDB
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
from typing import Dict, List, Type, Union
×
16

17
import numpy as np
×
18

19
from evadb.catalog.catalog_type import ColumnType, NdArrayType
×
20
from evadb.catalog.models.udf_io_catalog import UdfIOCatalogEntry
×
21
from evadb.catalog.models.udf_metadata_catalog import UdfMetadataCatalogEntry
×
22
from evadb.third_party.huggingface.model import (
×
23
    ASRHFModel,
24
    AudioHFModel,
25
    HFInputTypes,
26
    ImageHFModel,
27
    TextHFModel,
28
)
29
from evadb.utils.generic_utils import try_to_import_transformers
×
30

31
"""
×
32
We currently support the following tasks from HuggingFace.
33
Each task is mapped to the type of input it expects.
34
"""
35
INPUT_TYPE_FOR_SUPPORTED_TASKS = {
×
36
    "audio-classification": HFInputTypes.AUDIO,
37
    "automatic-speech-recognition": HFInputTypes.AUDIO,
38
    "text-classification": HFInputTypes.TEXT,
39
    "summarization": HFInputTypes.TEXT,
40
    "translation": HFInputTypes.TEXT,
41
    "text2text-generation": HFInputTypes.TEXT,
42
    "text-generation": HFInputTypes.TEXT,
43
    "ner": HFInputTypes.TEXT,
44
    "image-classification": HFInputTypes.IMAGE,
45
    "image-segmentation": HFInputTypes.IMAGE,
46
    "image-to-text": HFInputTypes.IMAGE,
47
    "object-detection": HFInputTypes.IMAGE,
48
    "depth-estimation": HFInputTypes.IMAGE,
49
}
50

51
MODEL_FOR_TASK = {
×
52
    "audio-classification": AudioHFModel,
53
    "automatic-speech-recognition": ASRHFModel,
54
    "text-classification": TextHFModel,
55
    "summarization": TextHFModel,
56
    "translation": TextHFModel,
57
    "text2text-generation": TextHFModel,
58
    "text-generation": TextHFModel,
59
    "ner": TextHFModel,
60
    "image-classification": ImageHFModel,
61
    "image-segmentation": ImageHFModel,
62
    "image-to-text": ImageHFModel,
63
    "object-detection": ImageHFModel,
64
    "depth-estimation": ImageHFModel,
65
}
66

67

68
def sample_text():
×
69
    return "My name is Sarah and I live in London"
×
70

71

72
def sample_image():
×
73
    from PIL import Image, ImageDraw
×
74

75
    width, height = 224, 224
×
76
    image = Image.new("RGB", (width, height), "white")
×
77
    draw = ImageDraw.Draw(image)
×
78

79
    circle_radius = min(width, height) // 4
×
80
    circle_center = (width // 2, height // 2)
×
81
    circle_bbox = (
×
82
        circle_center[0] - circle_radius,
83
        circle_center[1] - circle_radius,
84
        circle_center[0] + circle_radius,
85
        circle_center[1] + circle_radius,
86
    )
87
    draw.ellipse(circle_bbox, fill="yellow")
×
88
    return image
×
89

90

91
def sample_audio():
×
92
    duration_ms, sample_rate = 1000, 16000
×
93
    num_samples = int(duration_ms * sample_rate / 1000)
×
94
    audio_data = np.random.rand(num_samples)
×
95
    return audio_data
×
96

97

98
def gen_sample_input(input_type: HFInputTypes):
×
99
    if input_type == HFInputTypes.TEXT:
×
100
        return sample_text()
×
101
    elif input_type == HFInputTypes.IMAGE:
×
102
        return sample_image()
×
103
    elif input_type == HFInputTypes.AUDIO:
×
104
        return sample_audio()
×
105
    assert False, "Invalid Input Type for UDF"
×
106

107

108
def infer_output_name_and_type(**pipeline_args):
×
109
    """
110
    Infer the name and type for each output of the HuggingFace UDF
111
    """
112
    assert "task" in pipeline_args, "Task Not Found In Model Definition"
×
113
    task = pipeline_args["task"]
×
114
    assert (
×
115
        task in INPUT_TYPE_FOR_SUPPORTED_TASKS
116
    ), f"Task {task} not supported in EvaDB currently"
117

118
    # Construct the pipeline
119
    try_to_import_transformers()
×
120
    from transformers import pipeline
×
121

122
    pipe = pipeline(**pipeline_args)
×
123

124
    # Run the pipeline through a dummy input to get a sample output
125
    input_type = INPUT_TYPE_FOR_SUPPORTED_TASKS[task]
×
126
    model_input = gen_sample_input(input_type)
×
127
    model_output = pipe(model_input)
×
128

129
    # Get a dictionary of output names and types from the output
130
    output_types = {}
×
131
    if isinstance(model_output, list):
×
132
        sample_out = model_output[0]
×
133
    else:
134
        sample_out = model_output
×
135

136
    for key, value in sample_out.items():
×
137
        output_types[key] = type(value)
×
138

139
    return input_type, output_types
×
140

141

142
def io_entry_for_inputs(udf_name: str, udf_input: Union[str, List]):
×
143
    """
144
    Generates the IO Catalog Entry for the inputs to HF UDFs
145
    Input is one of ["text", "image", "audio", "video", "multimodal"]
146
    """
147
    if isinstance(udf_input, HFInputTypes):
×
148
        udf_input = [udf_input]
×
149
    inputs = []
×
150
    for input_type in udf_input:
×
151
        array_type = NdArrayType.ANYTYPE
×
152
        if input_type == HFInputTypes.TEXT:
×
153
            array_type = NdArrayType.STR
×
154
        elif input_type == HFInputTypes.IMAGE or udf_input == HFInputTypes.AUDIO:
×
155
            array_type = NdArrayType.FLOAT32
×
156
        inputs.append(
×
157
            UdfIOCatalogEntry(
158
                name=f"{udf_name}_{input_type}",
159
                type=ColumnType.NDARRAY,
160
                is_nullable=False,
161
                array_type=array_type,
162
                is_input=True,
163
            )
164
        )
165
    return inputs
×
166

167

168
def ptype_to_ndarray_type(col_type: type):
×
169
    """
170
    Helper function that maps python types to ndarray types
171
    """
172
    if col_type == str:
×
173
        return NdArrayType.STR
×
174
    elif col_type == float:
×
175
        return NdArrayType.FLOAT32
×
176
    else:
177
        return NdArrayType.ANYTYPE
×
178

179

180
def io_entry_for_outputs(udf_outputs: Dict[str, Type]):
×
181
    """
182
    Generates the IO Catalog Entry for the output
183
    """
184
    outputs = []
×
185
    for col_name, col_type in udf_outputs.items():
×
186
        outputs.append(
×
187
            UdfIOCatalogEntry(
188
                name=col_name,
189
                type=ColumnType.NDARRAY,
190
                array_type=ptype_to_ndarray_type(col_type),
191
                is_input=False,
192
            )
193
        )
194
    return outputs
×
195

196

197
def gen_hf_io_catalog_entries(udf_name: str, metadata: List[UdfMetadataCatalogEntry]):
×
198
    """
199
    Generates IO Catalog Entries for a HuggingFace UDF.
200
    The attributes of the huggingface model can be extracted from metadata.
201
    """
202
    pipeline_args = {arg.key: arg.value for arg in metadata}
×
203
    udf_input, udf_output = infer_output_name_and_type(**pipeline_args)
×
204
    annotated_inputs = io_entry_for_inputs(udf_name, udf_input)
×
205
    annotated_outputs = io_entry_for_outputs(udf_output)
×
206
    return annotated_inputs + annotated_outputs
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc