• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SamhammerAG / ai-data-preprocessing-queue / 18778840565

24 Oct 2025 11:48AM UTC coverage: 87.374% (-4.1%) from 91.515%
18778840565

Pull #14

github

cwehmeier
KIT-4469 fixed linting issue
Pull Request #14: KIT-4467 added signature removal as step

22 of 33 new or added lines in 1 file covered. (66.67%)

173 of 198 relevant lines covered (87.37%)

0.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

66.67
/ai_data_preprocessing_queue/Steps/remove_signature.py
1
import re
1✔
2

3

4
def remove_newline(text: str) -> str:
1✔
5
    """Remove excessive newlines or spaces from the text."""
6
    pattern = re.compile(r"\s{2,}|[\n\r]{3,}")
1✔
7
    result = pattern.sub(" ", text)
1✔
8
    result = re.sub(r"\s+", " ", result).strip()
1✔
9

10
    return result
1✔
11

12

13
GreetingExpressions = ["sincerely", "best regards", "happy holidays", "kind regards", "warm regards", "cheers",
1✔
14
                       "regards", "mit freundlichen grüßen", "freundliche grüße", "beste grüße", "viele grüße",
15
                       "herzliche grüße", "liebe grüße", "mit freundlichen grüssen", "freundliche grüsse",
16
                       "beste grüsse", "viele grüsse", "herzliche grüsse", "liebe grüsse"]
17
greetings_regex = r"(" + "|".join(GreetingExpressions) + r")\s*,?\s*"
1✔
18

19

20
def remove_greetings_and_following_text(text: str) -> str:
1✔
21
    pattern = greetings_regex + ".*"
1✔
22
    return re.sub(pattern, "", text, flags=re.IGNORECASE | re.UNICODE | re.DOTALL).strip()
1✔
23

24

25
# thank you expressions should be removed after greetings and following signature text,
26
# as they often appear at the beginning of a message
27
THANK_EXPRESSIONS = [
1✔
28
    r"thank you(?: very much)?",   # thank you, thank you very much
29
    r"thankyou(?: very much)?",   # thankyou, thankyou very much
30
    r"thanks(?: a lot| again)?",   # thanks, thanks a lot, thanks again
31
    r"many thanks",                # many thanks
32
    r"a thousand thanks",          # a thousand thanks
33
    r"danke(?: schön)?",           # danke, danke schön, danke und
34
    r"vielen dank",                # vielen dank
35
    r"dankeschön",                 # dankeschön
36
    r"besten dank"                 # besten dank
37
]
38

39
# Suffixes which could follow thank you expressions
40
THANK_SUFFIXES = [
1✔
41
    r"(?:in advance(?: for (?:your|the) (?:help|support|understanding|assistance))?)",
42
    r"(?:for (?:your|the) (?:help|support|understanding|assistance))",
43
    r"(?:schon mal )?(?:im voraus)?(?: für (?:ihre|ihr|eure|die|den) (?:hilfe|support|verständnis))?",
44
    r"vorab",
45
    r"kindly?"
46
]
47

48
# Combine them into a final regex pattern and compile
49
thank_expressions = r"|".join(THANK_EXPRESSIONS)
1✔
50
suffixes = r"(?:\s+(?:" + r"|".join(THANK_SUFFIXES) + r"))?"
1✔
51
final_pattern = (
1✔
52
    r"\b(?:" + thank_expressions + r")" + suffixes + r"\s*(?:,|\.|!|;)?\s*"
53
)
54
thanking_regex = re.compile(final_pattern, flags=re.IGNORECASE | re.UNICODE)
1✔
55

56

57
def remove_thanking_expressions(text: str) -> str:
1✔
NEW
58
    return thanking_regex.sub("", text)
×
59

60

61
# In the end, single greetings are removed again, which could not
62
# be reliably removed by the preceding expressions
63
single_greeting_words = ["liebe grüße", "liebe grüsse", "grüße", "grüsse", "gruß", "gruss"]
1✔
64
single_greetings_pattern = r"\b(?:{})\b".format("|".join(single_greeting_words))
1✔
65

66

67
def remove_single_greeting_words(text: str, pattern: str) -> str:
1✔
NEW
68
    return re.sub(pattern, " ", text, flags=re.IGNORECASE | re.UNICODE)
×
69

70

71
def step(text: str) -> str:
1✔
NEW
72
    if not text:
×
NEW
73
        return text
×
NEW
74
    try:
×
NEW
75
        text_greetings_removed = remove_greetings_and_following_text(text)
×
NEW
76
        thankyou_removed = remove_thanking_expressions(text_greetings_removed)
×
NEW
77
        single_greetings_removed = remove_single_greeting_words(thankyou_removed, single_greetings_pattern)
×
78

NEW
79
        return remove_newline(single_greetings_removed)
×
NEW
80
    except Exception as e:
×
NEW
81
        raise ValueError(f"An error occurred while removing signature: {e}") from e
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc