• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IBM / unitxt / 14706437845

28 Apr 2025 11:16AM UTC coverage: 80.149% (+0.1%) from 80.035%
14706437845

Pull #1764

github

web-flow
Merge 1ce583a5e into 29ef085a0
Pull Request #1764: Add tool calling support + Berekley Tool Calling Benchmark (simple-v3)

1643 of 2034 branches covered (80.78%)

Branch coverage included in aggregate %.

10268 of 12827 relevant lines covered (80.05%)

0.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.92
src/unitxt/serializers.py
1
import csv
1✔
2
import io
1✔
3
import json
1✔
4
from abc import abstractmethod
1✔
5
from typing import Any, Dict, List, Union
1✔
6

7
from .dataclass import AbstractField, Field
1✔
8
from .operators import InstanceFieldOperator
1✔
9
from .settings_utils import get_constants
1✔
10
from .tool_calling import convert_to_chat_api_format
1✔
11
from .type_utils import isoftype, to_type_string
1✔
12
from .types import (
1✔
13
    Dialog,
14
    Document,
15
    Image,
16
    MultiDocument,
17
    Number,
18
    SQLDatabase,
19
    Table,
20
    Tool,
21
    ToolCall,
22
    Video,
23
)
24

25
constants = get_constants()
1✔
26

27

28
class Serializer(InstanceFieldOperator):
1✔
29
    def process_instance_value(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
30
        return self.serialize(value, instance)
×
31

32
    @abstractmethod
1✔
33
    def serialize(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
34
        pass
×
35

36

37
class DefaultSerializer(Serializer):
1✔
38
    def serialize(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
39
        return str(value)
1✔
40

41

42
class SingleTypeSerializer(InstanceFieldOperator):
1✔
43
    serialized_type: object = AbstractField()
1✔
44

45
    def process_instance_value(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
46
        if not isoftype(value, self.serialized_type):
1✔
47
            raise ValueError(
×
48
                f"SingleTypeSerializer for type {self.serialized_type} should get this type. got {to_type_string(value)}"
49
            )
50
        return self.serialize(value, instance)
1✔
51

52

53
class DefaultListSerializer(Serializer):
1✔
54
    def serialize(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
55
        if isinstance(value, list):
×
56
            return ", ".join(str(item) for item in value)
×
57
        return str(value)
×
58

59

60
class ListSerializer(SingleTypeSerializer):
1✔
61
    serialized_type = list
1✔
62

63
    def serialize(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
64
        return ", ".join(str(item) for item in value)
1✔
65

66

67
class DictAsJsonSerializer(SingleTypeSerializer):
1✔
68
    serialized_type = dict
1✔
69

70
    def serialize(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
71
        return json.dumps(value)
×
72

73

74
class DialogSerializer(SingleTypeSerializer):
1✔
75
    serialized_type = Dialog
1✔
76

77
    def serialize(self, value: Dialog, instance: Dict[str, Any]) -> str:
1✔
78
        # Convert the Dialog into a string representation, typically combining roles and content
79
        return "\n".join(f"{turn['role']}: {turn['content']}" for turn in value)
1✔
80

81

82
class NumberSerializer(SingleTypeSerializer):
1✔
83
    serialized_type = Number
1✔
84

85
    def serialize(self, value: Number, instance: Dict[str, Any]) -> str:
1✔
86
        # Check if the value is an integer or a float
87
        if isinstance(value, int):
1✔
88
            return str(value)
1✔
89
        # For floats, format to one decimal place
90
        if isinstance(value, float):
1✔
91
            return f"{value:.1f}"
1✔
92
        raise ValueError("Unsupported type for NumberSerializer")
×
93

94

95
class NumberQuantizingSerializer(NumberSerializer):
1✔
96
    serialized_type = Number
1✔
97
    quantum: Union[float, int] = 0.1
1✔
98

99
    def serialize(self, value: Number, instance: Dict[str, Any]) -> str:
1✔
100
        if isoftype(value, Number):
1✔
101
            quantized_value = round(value / self.quantum) / (1 / self.quantum)
1✔
102
            if isinstance(self.quantum, int):
1✔
103
                quantized_value = int(quantized_value)
1✔
104
            return str(quantized_value)
1✔
105
        raise ValueError("Unsupported type for NumberSerializer")
×
106

107

108
class TableSerializer(SingleTypeSerializer):
1✔
109
    serialized_type = Table
1✔
110

111
    def serialize(self, value: Table, instance: Dict[str, Any]) -> str:
1✔
112
        output = io.StringIO()
1✔
113
        writer = csv.writer(output, lineterminator="\n")
1✔
114

115
        # Write the header and rows to the CSV writer
116
        writer.writerow(value["header"])
1✔
117
        writer.writerows(value["rows"])
1✔
118

119
        # Retrieve the CSV string
120
        return output.getvalue().strip()
1✔
121

122

123
class ImageSerializer(SingleTypeSerializer):
1✔
124
    serialized_type = Image
1✔
125

126
    def serialize(self, value: Image, instance: Dict[str, Any]) -> str:
1✔
127
        if "media" not in instance:
1✔
128
            instance["media"] = {}
1✔
129
        if "images" not in instance["media"]:
1✔
130
            instance["media"]["images"] = []
1✔
131
        idx = len(instance["media"]["images"])
1✔
132
        instance["media"]["images"].append(
1✔
133
            {"image": value["image"], "format": value["format"]}
134
        )
135
        value["image"] = f"media/images/{idx}"
1✔
136
        return f'<{constants.image_tag} src="media/images/{idx}">'
1✔
137

138

139
class VideoSerializer(ImageSerializer):
1✔
140
    serialized_type = Video
1✔
141

142
    def serialize(self, value: Video, instance: Dict[str, Any]) -> str:
1✔
143
        serialized_images = []
×
144
        for image in value:
×
145
            image = super().serialize(image, instance)
×
146
            serialized_images.append(image)
×
147
        return "".join(serialized_images)
×
148

149

150
class DocumentSerializer(SingleTypeSerializer):
1✔
151
    serialized_type = Document
1✔
152

153
    def serialize(self, value: Document, instance: Dict[str, Any]) -> str:
1✔
154
        return f"# {value['title']}\n\n{value['body']}"
×
155

156

157
class MultiDocumentSerializer(DocumentSerializer):
1✔
158
    serialized_type = MultiDocument
1✔
159

160
    def serialize(self, value: MultiDocument, instance: Dict[str, Any]) -> str:
1✔
161
        documents = []
×
162
        for document in value:
×
163
            documents.append(super().serialize(document, instance))
×
164
        return "\n\n".join(documents)
×
165

166

167

168
class ToolsSerializer(SingleTypeSerializer):
1✔
169

170
    serialized_type = List[Tool]
1✔
171
    _requirements_list: List[str] = ["pydantic"]
1✔
172

173
    def serialize(self, value: List[Tool], instance: Dict[str, Any]) -> str:
1✔
174
        if "__tools__" not in instance:
×
175
            instance["__tools__"] = []
×
176
        tool = []
×
177
        for tool in value:
×
178
            chat_api_tool = convert_to_chat_api_format(tool=tool)
×
179
            instance["__tools__"].append(
×
180
                chat_api_tool
181
            )
182
            tool["parameters"] = chat_api_tool["function"]["parameters"]
×
183
        return json.dumps(instance["__tools__"], indent=4)
×
184

185
class ToolCallSerializer(SingleTypeSerializer):
1✔
186

187
    serialized_type = ToolCall
1✔
188
    _requirements_list: List[str] = ["pydantic"]
1✔
189

190
    def serialize(self, value: ToolCall, instance: Dict[str, Any]) -> str:
1✔
191
        return json.dumps(value)
×
192

193
class MultiTypeSerializer(Serializer):
1✔
194
    serializers: List[SingleTypeSerializer] = Field(
1✔
195
        default_factory=lambda: [
196
            DocumentSerializer(),
197
            ToolCallSerializer(),
198
            DialogSerializer(),
199
            MultiDocumentSerializer(),
200
            ImageSerializer(),
201
            VideoSerializer(),
202
            TableSerializer(),
203
            ToolsSerializer(),
204
            DialogSerializer(),
205
        ]
206
    )
207

208
    def verify(self):
1✔
209
        super().verify()
1✔
210
        self._verify_serializers(self.serializers)
1✔
211

212
    def _verify_serializers(self, serializers):
1✔
213
        if not isoftype(serializers, List[SingleTypeSerializer]):
1✔
214
            raise ValueError(
×
215
                "MultiTypeSerializer requires the list of serializers to be List[SingleTypeSerializer]."
216
            )
217

218
    def add_serializers(self, serializers: List[SingleTypeSerializer]):
1✔
219
        self._verify_serializers(serializers)
1✔
220
        self.serializers = serializers + self.serializers
1✔
221

222
    def serialize(self, value: Any, instance: Dict[str, Any]) -> Any:
1✔
223
        for serializer in self.serializers:
1✔
224
            if isoftype(value, serializer.serialized_type):
1✔
225
                return serializer.serialize(value, instance)
1✔
226

227
        return str(value)
1✔
228

229

230
class SQLDatabaseAsSchemaSerializer(SingleTypeSerializer):
1✔
231
    """Serializes a database schema into a string representation."""
232

233
    serialized_type = SQLDatabase
1✔
234

235
    def serialize(self, value: SQLDatabase, instance: Dict[str, Any]) -> str:
1✔
236
        from .sql_utils import get_db_connector
1✔
237

238
        connector = get_db_connector(value["db_type"])(value)
1✔
239
        return connector.get_table_schema()
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc