• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SamhammerAG / ai-data-preprocessing-queue / 6941677447

21 Nov 2023 09:16AM UTC coverage: 91.566%. Remained the same
6941677447

push

github

web-flow
Merge pull request #11 from SamhammerAG/KIT-3065

KIT 3065 Update & Maintenance

49 of 62 new or added lines in 11 files covered. (79.03%)

1 existing line in 1 file now uncovered.

152 of 166 relevant lines covered (91.57%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.0
/ai_data_preprocessing_queue/Steps/spellcheck.py
1
from functools import reduce
1✔
2
from typing import Any, Dict, Optional, Set, cast
1✔
3

4
import numpy as np
1✔
5

6

7
def step(item: Any, item_state: Dict[str, Any], global_state: Optional[Dict[str, Any]], preprocessor_data: str) -> Any:
1✔
8
    if preprocessor_data is None:
1✔
UNCOV
9
        return item
×
10

11
    words = preprocessor_data.splitlines()
1✔
12

13
    if not words:
1✔
14
        return item
1✔
15

16
    values = {len(w) for w in words}
1✔
17
    grouped_replace_words = [{"key": key, "items": list(filter(lambda x: len(x) == key, words))} for key in values]
1✔
18
    all_item_words: Set[str] = set(item.split(" "))  # reduce all words
1✔
19
    # all words with more than 4 can have distance 2, al other 1
20

21
    for item_word in all_item_words:
1✔
22
        if item_word in words:
1✔
23
            continue
1✔
24

25
        length = len(item_word)
1✔
26
        items = [x.get("items") for x in grouped_replace_words if length - 2 <= cast(int, x.get("key")) <= length + 2]
1✔
27
        if not items:
1✔
28
            continue
1✔
29

30
        all_words_to_check: Any = reduce(lambda x, y: cast(str, x) + cast(str, y), items)
1✔
31

32
        for w in all_words_to_check:
1✔
33
            if len(item_word) < 4 and _levenshtein(item_word, w) == 1:
1✔
NEW
34
                item = item.replace(item_word, w)
×
35
            elif len(item_word) >= 4 and 1 <= _levenshtein(item_word, w) <= 2:
1✔
36
                item = item.replace(item_word, w)
1✔
37

38
    return item
1✔
39

40

41
def _levenshtein(seq1: str, seq2: str) -> int:
1✔
42
    size_x = len(seq1) + 1
1✔
43
    size_y = len(seq2) + 1
1✔
44
    matrix = np.zeros((size_x, size_y))
1✔
45
    for x in range(size_x):
1✔
46
        matrix[x, 0] = x
1✔
47
    for y in range(size_y):
1✔
48
        matrix[0, y] = y
1✔
49

50
    for x in range(1, size_x):
1✔
51
        for y in range(1, size_y):
1✔
52
            if seq1[x - 1] == seq2[y - 1]:
1✔
53
                matrix[x, y] = min(
1✔
54
                    matrix[x - 1, y] + 1,
55
                    matrix[x - 1, y - 1],
56
                    matrix[x, y - 1] + 1,
57
                )
58
            else:
59
                matrix[x, y] = min(
1✔
60
                    matrix[x - 1, y] + 1,
61
                    matrix[x - 1, y - 1] + 1,
62
                    matrix[x, y - 1] + 1,
63
                )
64

65
    return matrix[size_x - 1, size_y - 1]
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc