• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SamhammerAG / ai-data-preprocessing-queue / 13242820435

10 Feb 2025 02:12PM UTC coverage: 91.515% (-0.05%) from 91.566%
13242820435

push

github

web-flow
Merge pull request #13 from SamhammerAG/KIT-4026

KIT-4026 update

151 of 165 relevant lines covered (91.52%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.25
/ai_data_preprocessing_queue/Steps/token_replacement.py
1
import re
1✔
2
from typing import Any
1✔
3

4

5
# the higher the number the higher the prio
6
def step(item: Any, item_state: dict[str, Any], global_state: dict[str, Any] | None, preprocessor_data: str) -> Any:
1✔
7
    if preprocessor_data is None or not preprocessor_data:
1✔
8
        return item
1✔
9

10
    lines = _get_data_from_store_or_reload(global_state, preprocessor_data)
1✔
11

12
    for line in lines:
1✔
13
        escaped = re.escape(line[0])
1✔
14
        regex = "\\b" + escaped
1✔
15

16
        # also replace dots at end of word
17
        if not line[0].endswith("."):
1✔
18
            regex = regex + "\\b"
1✔
19

20
        pattern = re.compile(regex)
1✔
21
        item = pattern.sub(line[1], item)
1✔
22

23
    return item
1✔
24

25

26
def _get_data_from_store_or_reload(global_state: dict[str, Any] | None, preprocessor_data: str) -> list[list[str]]:
1✔
27
    if global_state is None:
1✔
28
        return _prepare_pre_processor_data(preprocessor_data)
1✔
29

30
    dict_identifier = "tokenReplacementpreprocessor_data"
×
31
    if dict_identifier in global_state:
×
32
        return global_state[dict_identifier]
×
33

34
    prepared_data = _prepare_pre_processor_data(preprocessor_data)
×
35
    global_state[dict_identifier] = prepared_data
×
36
    return prepared_data
×
37

38

39
def _prepare_pre_processor_data(preprocessor_data: str) -> list[list[str]]:
1✔
40
    lines: list[list[str]] = [
1✔
41
        [s.strip() for i, s in enumerate(line.split(",")) if (i == 2 and re.compile(r"^[0-9\s]+$").match(s)) or i < 2]
42
        for line in preprocessor_data.splitlines()
43
        if line.count(",") == 2
44
    ]
45
    lines = [line for line in lines if len(line) == 3]
1✔
46

47
    i: int = 0
1✔
48
    while i < len(lines):
1✔
49
        lines[i][2] = int(lines[i][2])  # type: ignore
1✔
50
        i += 1
1✔
51

52
    # sort
53
    lines = sorted(lines, key=lambda f: 0 - f[2])  # type: ignore
1✔
54

55
    return lines
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc