• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IBM / unitxt / 14911405902

08 May 2025 04:31PM UTC coverage: 80.074% (-0.07%) from 80.14%
14911405902

Pull #1773

github

web-flow
Merge e96fbbe15 into 2d15f20af
Pull Request #1773: Simplify tool calling base types

1645 of 2037 branches covered (80.76%)

Branch coverage included in aggregate %.

10250 of 12818 relevant lines covered (79.97%)

0.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.52
src/unitxt/serializers.py
1
import csv
1✔
2
import io
1✔
3
import json
1✔
4
from abc import abstractmethod
1✔
5
from typing import Any, Dict, List, Union
1✔
6

7
from .dataclass import AbstractField, Field
1✔
8
from .operators import InstanceFieldOperator
1✔
9
from .settings_utils import get_constants
1✔
10
from .type_utils import isoftype, to_type_string
1✔
11
from .types import (
1✔
12
    Dialog,
13
    Document,
14
    Image,
15
    MultiDocument,
16
    Number,
17
    SQLDatabase,
18
    Table,
19
    Tool,
20
    ToolCall,
21
    Video,
22
)
23

24
constants = get_constants()
1✔
25

26

27
class Serializer(InstanceFieldOperator):
1✔
28
    def process_instance_value(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
29
        return self.serialize(value, instance)
×
30

31
    @abstractmethod
1✔
32
    def serialize(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
33
        pass
×
34

35

36
class DefaultSerializer(Serializer):
1✔
37
    def serialize(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
38
        return str(value)
1✔
39

40

41
class SingleTypeSerializer(InstanceFieldOperator):
1✔
42
    serialized_type: object = AbstractField()
1✔
43

44
    def process_instance_value(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
45
        if not isoftype(value, self.serialized_type):
1✔
46
            raise ValueError(
×
47
                f"SingleTypeSerializer for type {self.serialized_type} should get this type. got {to_type_string(value)}"
48
            )
49
        return self.serialize(value, instance)
1✔
50

51

52
class DefaultListSerializer(Serializer):
1✔
53
    def serialize(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
54
        if isinstance(value, list):
×
55
            return ", ".join(str(item) for item in value)
×
56
        return str(value)
×
57

58

59
class ListSerializer(SingleTypeSerializer):
1✔
60
    serialized_type = list
1✔
61

62
    def serialize(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
63
        return ", ".join(str(item) for item in value)
1✔
64

65

66
class DictAsJsonSerializer(SingleTypeSerializer):
1✔
67
    serialized_type = dict
1✔
68

69
    def serialize(self, value: Any, instance: Dict[str, Any]) -> str:
1✔
70
        return json.dumps(value)
×
71

72

73
class DialogSerializer(SingleTypeSerializer):
1✔
74
    serialized_type = Dialog
1✔
75

76
    def serialize(self, value: Dialog, instance: Dict[str, Any]) -> str:
1✔
77
        # Convert the Dialog into a string representation, typically combining roles and content
78
        return "\n".join(f"{turn['role']}: {turn['content']}" for turn in value)
1✔
79

80

81
class NumberSerializer(SingleTypeSerializer):
1✔
82
    serialized_type = Number
1✔
83

84
    def serialize(self, value: Number, instance: Dict[str, Any]) -> str:
1✔
85
        # Check if the value is an integer or a float
86
        if isinstance(value, int):
1✔
87
            return str(value)
1✔
88
        # For floats, format to one decimal place
89
        if isinstance(value, float):
1✔
90
            return f"{value:.1f}"
1✔
91
        raise ValueError("Unsupported type for NumberSerializer")
×
92

93

94
class NumberQuantizingSerializer(NumberSerializer):
1✔
95
    serialized_type = Number
1✔
96
    quantum: Union[float, int] = 0.1
1✔
97

98
    def serialize(self, value: Number, instance: Dict[str, Any]) -> str:
1✔
99
        if isoftype(value, Number):
1✔
100
            quantized_value = round(value / self.quantum) / (1 / self.quantum)
1✔
101
            if isinstance(self.quantum, int):
1✔
102
                quantized_value = int(quantized_value)
1✔
103
            return str(quantized_value)
1✔
104
        raise ValueError("Unsupported type for NumberSerializer")
×
105

106

107
class TableSerializer(SingleTypeSerializer):
1✔
108
    serialized_type = Table
1✔
109

110
    def serialize(self, value: Table, instance: Dict[str, Any]) -> str:
1✔
111
        output = io.StringIO()
1✔
112
        writer = csv.writer(output, lineterminator="\n")
1✔
113

114
        # Write the header and rows to the CSV writer
115
        writer.writerow(value["header"])
1✔
116
        writer.writerows(value["rows"])
1✔
117

118
        # Retrieve the CSV string
119
        return output.getvalue().strip()
1✔
120

121

122
class ImageSerializer(SingleTypeSerializer):
1✔
123
    serialized_type = Image
1✔
124

125
    def serialize(self, value: Image, instance: Dict[str, Any]) -> str:
1✔
126
        if "media" not in instance:
1✔
127
            instance["media"] = {}
1✔
128
        if "images" not in instance["media"]:
1✔
129
            instance["media"]["images"] = []
1✔
130
        idx = len(instance["media"]["images"])
1✔
131
        instance["media"]["images"].append(
1✔
132
            {"image": value["image"], "format": value["format"]}
133
        )
134
        value["image"] = f"media/images/{idx}"
1✔
135
        return f'<{constants.image_tag} src="media/images/{idx}">'
1✔
136

137

138
class VideoSerializer(ImageSerializer):
1✔
139
    serialized_type = Video
1✔
140

141
    def serialize(self, value: Video, instance: Dict[str, Any]) -> str:
1✔
142
        serialized_images = []
×
143
        for image in value:
×
144
            image = super().serialize(image, instance)
×
145
            serialized_images.append(image)
×
146
        return "".join(serialized_images)
×
147

148

149
class DocumentSerializer(SingleTypeSerializer):
1✔
150
    serialized_type = Document
1✔
151

152
    def serialize(self, value: Document, instance: Dict[str, Any]) -> str:
1✔
153
        return f"# {value['title']}\n\n{value['body']}"
×
154

155

156
class MultiDocumentSerializer(DocumentSerializer):
1✔
157
    serialized_type = MultiDocument
1✔
158

159
    def serialize(self, value: MultiDocument, instance: Dict[str, Any]) -> str:
1✔
160
        documents = []
×
161
        for document in value:
×
162
            documents.append(super().serialize(document, instance))
×
163
        return "\n\n".join(documents)
×
164

165

166

167
class ToolsSerializer(SingleTypeSerializer):
1✔
168

169
    serialized_type = List[Tool]
1✔
170

171
    def serialize(self, value: List[Tool], instance: Dict[str, Any]) -> str:
1✔
172
        if "__tools__" not in instance:
×
173
            instance["__tools__"] = []
×
174
        tool = []
×
175
        for tool in value:
×
176
            instance["__tools__"].append(
×
177
                {"type": "function", "function": tool}
178
            )
179
        return json.dumps(instance["__tools__"], indent=4)
×
180

181
class ToolCallSerializer(SingleTypeSerializer):
1✔
182

183
    serialized_type = ToolCall
1✔
184

185
    def serialize(self, value: ToolCall, instance: Dict[str, Any]) -> str:
1✔
186
        return json.dumps(value)
×
187

188
class MultiTypeSerializer(Serializer):
1✔
189
    serializers: List[SingleTypeSerializer] = Field(
1✔
190
        default_factory=lambda: [
191
            DocumentSerializer(),
192
            ToolCallSerializer(),
193
            DialogSerializer(),
194
            MultiDocumentSerializer(),
195
            ImageSerializer(),
196
            VideoSerializer(),
197
            TableSerializer(),
198
            ToolsSerializer(),
199
            DialogSerializer(),
200
        ]
201
    )
202

203
    def verify(self):
1✔
204
        super().verify()
1✔
205
        self._verify_serializers(self.serializers)
1✔
206

207
    def _verify_serializers(self, serializers):
1✔
208
        if not isoftype(serializers, List[SingleTypeSerializer]):
1✔
209
            raise ValueError(
×
210
                "MultiTypeSerializer requires the list of serializers to be List[SingleTypeSerializer]."
211
            )
212

213
    def add_serializers(self, serializers: List[SingleTypeSerializer]):
1✔
214
        self._verify_serializers(serializers)
1✔
215
        self.serializers = serializers + self.serializers
1✔
216

217
    def serialize(self, value: Any, instance: Dict[str, Any]) -> Any:
1✔
218
        for serializer in self.serializers:
1✔
219
            if isoftype(value, serializer.serialized_type):
1✔
220
                return serializer.serialize(value, instance)
1✔
221

222
        return str(value)
1✔
223

224

225
class SQLDatabaseAsSchemaSerializer(SingleTypeSerializer):
1✔
226
    """Serializes a database schema into a string representation."""
227

228
    serialized_type = SQLDatabase
1✔
229

230
    def serialize(self, value: SQLDatabase, instance: Dict[str, Any]) -> str:
1✔
231
        from .sql_utils import get_db_connector
1✔
232

233
        connector = get_db_connector(value["db_type"])(value)
1✔
234
        return connector.get_table_schema()
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc