• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IBM / unitxt / 13373423166

17 Feb 2025 03:16PM UTC coverage: 81.082% (+0.01%) from 81.07%
13373423166

Pull #1603

github

web-flow
Merge ec558d994 into fe79da35a
Pull Request #1603: Enable local caching with UNITXT_LOCAL_CACHE

1497 of 1840 branches covered (81.36%)

Branch coverage included in aggregate %.

9505 of 11729 relevant lines covered (81.04%)

0.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.67
src/unitxt/string_operators.py
1
import re
1✔
2
from typing import (
1✔
3
    Any,
4
    Dict,
5
    List,
6
    Optional,
7
)
8

9
from .operators import FieldOperator, InstanceOperator
1✔
10
from .settings_utils import get_settings
1✔
11

12
settings = get_settings()
1✔
13

14
class Split(FieldOperator):
1✔
15
    by: str
1✔
16

17
    def process_value(self, value: str) -> List[str]:
1✔
18
        return value.split(self.by)
1✔
19

20

21
class RegexSplit(FieldOperator):
1✔
22
    by: str
1✔
23

24
    def process_value(self, value: str) -> List[str]:
1✔
25
        return re.split(self.by, value)
1✔
26

27

28
class TokensSplit(FieldOperator):
1✔
29
    model: str
1✔
30
    _requirements_list = ["transformers"]
1✔
31

32
    def prepare(self):
1✔
33
        super().prepare()
1✔
34
        from transformers import AutoTokenizer
1✔
35

36
        self.tokenizer = AutoTokenizer.from_pretrained(self.model, cache_dir=settings.local_cache)
1✔
37

38
    def process_value(self, value: str) -> List[str]:
1✔
39
        return self.tokenizer.tokenize(value)
1✔
40

41

42
class TokensSlice(FieldOperator):
1✔
43
    model: str
1✔
44
    start: Optional[int] = None
1✔
45
    stop: Optional[int] = None
1✔
46
    step: Optional[int] = None
1✔
47

48
    _requirements_list = ["transformers"]
1✔
49

50
    def prepare(self):
1✔
51
        super().prepare()
1✔
52
        from transformers import AutoTokenizer
1✔
53

54
        self.tokenizer = AutoTokenizer.from_pretrained(self.model, cache_dir=settings.local_cache)
1✔
55

56
    def process_value(self, value: str) -> str:
1✔
57
        encoded = self.tokenizer.encode(value)
1✔
58
        slicer = slice(self.start, self.stop, self.step)
1✔
59
        sliced = encoded[slicer]
1✔
60
        return self.tokenizer.decode(sliced)
1✔
61

62

63
class Join(FieldOperator):
1✔
64
    by: str
1✔
65

66
    def process_value(self, value: List[str]) -> str:
1✔
67
        return self.by.join(value)
1✔
68

69

70
class FormatText(InstanceOperator):
1✔
71
    to_field: str
1✔
72
    text: str
1✔
73

74
    def process(
1✔
75
        self, instance: Dict[str, Any], stream_name: Optional[str] = None
76
    ) -> Dict[str, Any]:
77
        instance[self.to_field] = self.text.format(**instance)
×
78
        return instance
×
79

80

81
class Strip(FieldOperator):
1✔
82
    def process_value(self, value: str) -> str:
1✔
83
        return value.strip()
×
84

85

86
class Replace(FieldOperator):
1✔
87
    old: str
1✔
88
    new: str
1✔
89

90
    def process_value(self, value: str) -> str:
1✔
91
        return value.replace(self.old, self.new)
×
92

93

94
class MapReplace(FieldOperator):
1✔
95
    mapping: Dict[str, str]
1✔
96

97
    def process_value(self, value: Any) -> Any:
1✔
98
        for key, val in self.mapping.items():
×
99
            value = value.replace(key, val)
×
100
        return value
×
101

102

103
class RegexReplace(FieldOperator):
1✔
104
    pattern: str  # A regex pattern
1✔
105
    replacement: str  # The replacement string or template
1✔
106

107
    def prepare(self):
1✔
108
        super().prepare()
1✔
109
        self.pattern = re.compile(self.pattern)
1✔
110

111
    def process_value(self, value: Any) -> Any:
1✔
112
        if isinstance(value, str):
1✔
113
            return re.sub(self.pattern, self.replacement, value)
1✔
114
        return value  # If not a string, return the value as is
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc